diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types.go b/cmd/thv-operator/api/v1beta1/mcpserver_types.go index 3a21eac58e..35fb76fe5b 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types.go @@ -507,18 +507,18 @@ type SessionStorageConfig struct { // // +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured" // -//nolint:lll // CEL validation rules exceed line length limit +//nolint:lll // kubebuilder marker exceeds line length type RateLimitConfig struct { // Shared is a token bucket shared across all users for the entire server. // +optional - Shared *RateLimitBucket `json:"shared,omitempty"` + Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` // PerUser is a token bucket applied independently to each authenticated user // at the server level. Requires authentication to be enabled. // Each unique userID creates Redis keys that expire after 2x refillPeriod. // Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. // +optional - PerUser *RateLimitBucket `json:"perUser,omitempty"` + PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` // Tools defines per-tool rate limit overrides. // Each entry applies additional rate limits to calls targeting a specific tool name. @@ -526,24 +526,24 @@ type RateLimitConfig struct { // +listType=map // +listMapKey=name // +optional - Tools []ToolRateLimitConfig `json:"tools,omitempty"` + Tools []ToolRateLimitConfig `json:"tools,omitempty" yaml:"tools,omitempty"` } // RateLimitBucket defines a token bucket configuration with a maximum capacity -// and a refill period. Used by both shared (global) and per-user rate limits. +// and a refill period. Used by both shared and per-user rate limits. type RateLimitBucket struct { // MaxTokens is the maximum number of tokens (bucket capacity). // This is also the burst size: the maximum number of requests that can be served // instantaneously before the bucket is depleted. // +kubebuilder:validation:Required // +kubebuilder:validation:Minimum=1 - MaxTokens int32 `json:"maxTokens"` + MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"` // RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. // The effective refill rate is maxTokens / refillPeriod tokens per second. // Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). // +kubebuilder:validation:Required - RefillPeriod metav1.Duration `json:"refillPeriod"` + RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"` } // ToolRateLimitConfig defines rate limits for a specific tool. @@ -556,15 +556,15 @@ type ToolRateLimitConfig struct { // Name is the MCP tool name this limit applies to. // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - Name string `json:"name"` + Name string `json:"name" yaml:"name"` // Shared token bucket for this specific tool. // +optional - Shared *RateLimitBucket `json:"shared,omitempty"` + Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` // PerUser token bucket configuration for this tool. // +optional - PerUser *RateLimitBucket `json:"perUser,omitempty"` + PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` } // Permission profile types diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go index 0e69f33836..4a886bb74f 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go @@ -116,6 +116,44 @@ func TestRateLimitConfigJSONRoundtrip(t *testing.T) { } } +func TestVirtualMCPServerSpecRateLimitingJSONRoundtrip(t *testing.T) { + t.Parallel() + + spec := VirtualMCPServerSpec{ + IncomingAuth: &IncomingAuthConfig{Type: "oidc"}, + GroupRef: &MCPGroupRef{Name: "group-a"}, + SessionStorage: &SessionStorageConfig{ + Provider: "redis", + Address: "redis.default.svc.cluster.local:6379", + }, + RateLimiting: &RateLimitConfig{ + Shared: &RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + PerUser: &RateLimitBucket{ + MaxTokens: 2, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + Tools: []ToolRateLimitConfig{ + { + Name: "backend_a_echo", + Shared: &RateLimitBucket{ + MaxTokens: 5, + RefillPeriod: metav1.Duration{Duration: 30 * time.Second}, + }, + }, + }, + }, + } + + b, err := json.Marshal(spec) + require.NoError(t, err) + out := string(b) + assert.Contains(t, out, `"rateLimiting"`) + assert.Contains(t, out, `"shared"`) + assert.Contains(t, out, `"perUser"`) + assert.Contains(t, out, `"backend_a_echo"`) + assert.NotContains(t, out, `"config":{"rateLimiting"`) +} + func TestMCPServerSpecScalingFieldsJSONRoundtrip(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go index c63139b133..a23f2feaaa 100644 --- a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go +++ b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go @@ -16,6 +16,10 @@ import ( // VirtualMCPServerSpec defines the desired state of VirtualMCPServer // +// +kubebuilder:validation:XValidation:rule="!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider == 'redis')",message="rateLimiting requires sessionStorage with provider 'redis'" +// +kubebuilder:validation:XValidation:rule="!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="rateLimiting.perUser requires incomingAuth.type oidc" +// +kubebuilder:validation:XValidation:rule="!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="per-tool perUser rate limiting requires incomingAuth.type oidc" +// //nolint:lll // CEL validation rules exceed line length limit type VirtualMCPServerSpec struct { // IncomingAuth configures authentication for clients connecting to the Virtual MCP server. @@ -143,6 +147,11 @@ type VirtualMCPServerSpec struct { // +listType=atomic // +optional ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` + + // RateLimiting defines rate limiting configuration for the Virtual MCP server. + // Requires Redis session storage to be configured for distributed rate limiting. + // +optional + RateLimiting *RateLimitConfig `json:"rateLimiting,omitempty"` } // EmbeddingServerRef references an existing EmbeddingServer resource by name. diff --git a/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go index 04da44756d..6a58686950 100644 --- a/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go +++ b/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go @@ -3008,6 +3008,11 @@ func (in *VirtualMCPServerSpec) DeepCopyInto(out *VirtualMCPServerSpec) { *out = make([]corev1.LocalObjectReference, len(*in)) copy(*out, *in) } + if in.RateLimiting != nil { + in, out := &in.RateLimiting, &out.RateLimiting + *out = new(RateLimitConfig) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMCPServerSpec. diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go index 5d0fe5efde..2da5a24a45 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go @@ -507,6 +507,11 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { assert.Equal(t, "test-vmcp-vmcp-config", cm.Name) assert.Contains(t, cm.Data, "config.yaml") assert.NotEmpty(t, cm.Annotations["toolhive.stacklok.dev/content-checksum"]) + + var cfg vmcpconfig.Config + require.NoError(t, yaml.Unmarshal([]byte(cm.Data["config.yaml"]), &cfg)) + assert.Equal(t, "test-vmcp", cfg.Name) + assert.Equal(t, "test-group", cfg.Group) } // TestSetAuthConfigConditions tests that auth config conditions reflect the current state diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go index b91f9d021d..c4a3484bd2 100644 --- a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go +++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go @@ -466,6 +466,7 @@ var _ = Describe("EmbeddingServer Controller Update Tests", func() { Expect(k8sClient.Create(ctx, embeddingServer)).To(Succeed()) Eventually(func(g Gomega) { g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &appsv1.StatefulSet{})).To(Succeed()) + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &corev1.Service{})).To(Succeed()) }, timeout, interval).Should(Succeed()) }) diff --git a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go index 45b6043196..6a74efd2aa 100644 --- a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go +++ b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go @@ -5,6 +5,8 @@ package controllers import ( + "time" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -106,4 +108,60 @@ var _ = Describe("CEL Validation for SessionStorageConfig on VirtualMCPServer", Expect(err).To(HaveOccurred()) }) }) + + Context("rateLimiting", func() { + It("should reject rate limiting without redis session storage", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-no-redis", nil) + vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{ + Shared: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("rateLimiting requires sessionStorage with provider 'redis'")) + }) + + It("should reject perUser rate limiting with anonymous auth", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-anon", &mcpv1beta1.SessionStorageConfig{ + Provider: "redis", + Address: "redis:6379", + }) + vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{ + PerUser: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("rateLimiting.perUser requires incomingAuth.type oidc")) + }) + + It("should accept perUser rate limiting with oidc auth and redis session storage", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-oidc", &mcpv1beta1.SessionStorageConfig{ + Provider: "redis", + Address: "redis:6379", + }) + vmcp.Spec.IncomingAuth = &mcpv1beta1.IncomingAuthConfig{ + Type: "oidc", + OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{ + Name: "oidc", + Audience: "test-audience", + }, + } + vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{ + PerUser: &mcpv1beta1.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).NotTo(HaveOccurred()) + }) + }) }) diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml index a51fe4b5bd..913eb6ca9e 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -2242,6 +2242,131 @@ spec: This field accepts a PodTemplateSpec object as JSON/YAML. type: object x-kubernetes-preserve-unknown-fields: true + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of shared or perUser must be configured. + properties: + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for this + tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -2347,6 +2472,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider + == ''redis'')' + - message: rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || + (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, + !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type + == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: @@ -4738,6 +4874,131 @@ spec: This field accepts a PodTemplateSpec object as JSON/YAML. type: object x-kubernetes-preserve-unknown-fields: true + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of shared or perUser must be configured. + properties: + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for this + tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -4843,6 +5104,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider + == ''redis'')' + - message: rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || + (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, + !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type + == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml index 6078670479..1429da0fbe 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -2245,6 +2245,131 @@ spec: This field accepts a PodTemplateSpec object as JSON/YAML. type: object x-kubernetes-preserve-unknown-fields: true + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of shared or perUser must be configured. + properties: + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for this + tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -2350,6 +2475,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider + == ''redis'')' + - message: rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || + (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, + !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type + == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: @@ -4741,6 +4877,131 @@ spec: This field accepts a PodTemplateSpec object as JSON/YAML. type: object x-kubernetes-preserve-unknown-fields: true + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of shared or perUser must be configured. + properties: + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for this + tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) replicas: description: |- Replicas is the desired number of vMCP pod replicas. @@ -4846,6 +5107,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider + == ''redis'')' + - message: rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || + (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, + !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type + == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index b6e0c50cda..9edb64f5ba 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -2687,7 +2687,7 @@ _Appears in:_ RateLimitBucket defines a token bucket configuration with a maximum capacity -and a refill period. Used by both shared (global) and per-user rate limits. +and a refill period. Used by both shared and per-user rate limits. @@ -2712,6 +2712,7 @@ At least one of shared, perUser, or tools must be configured. _Appears in:_ - [api.v1beta1.MCPServerSpec](#apiv1beta1mcpserverspec) +- [api.v1beta1.VirtualMCPServerSpec](#apiv1beta1virtualmcpserverspec) | Field | Description | Default | Validation | | --- | --- | --- | --- | @@ -3439,6 +3440,7 @@ _Appears in:_ | `replicas` _integer_ | Replicas is the desired number of vMCP pod replicas.
VirtualMCPServer creates a single Deployment for the vMCP aggregator process,
so there is only one replicas field (unlike MCPServer which has separate
Replicas and BackendReplicas for its two Deployments).
When nil, the operator does not set Deployment.Spec.Replicas, leaving replica
management to an HPA or other external controller. | | Minimum: 0
Optional: \{\}
| | `sessionStorage` _[api.v1beta1.SessionStorageConfig](#apiv1beta1sessionstorageconfig)_ | SessionStorage configures session storage for stateful horizontal scaling.
When nil, no session storage is configured. | | Optional: \{\}
| | `imagePullSecrets` _[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#localobjectreference-v1-core) array_ | ImagePullSecrets allows specifying image pull secrets for the vMCP workload.
These are applied to both the vMCP Deployment's PodSpec.ImagePullSecrets
and to the operator-managed ServiceAccount the vMCP server runs as, so private
images are pullable through either path.
Merge semantics with PodTemplateSpec:
The deployed PodSpec.ImagePullSecrets is the Kubernetes-native strategic-merge
union of this field and spec.podTemplateSpec.spec.imagePullSecrets, merged by
the patchStrategy:"merge" / patchMergeKey:"name" tags on corev1.PodSpec.
- This field is rendered first as the controller-generated default.
- spec.podTemplateSpec.spec.imagePullSecrets is then strategic-merge-patched
on top, keyed by Name. Distinct names from the two sources are unioned in
the resulting list; entries with the same Name are deduplicated and the
PodTemplateSpec entry wins on overlap (user override).
- Order in the resulting list is not guaranteed and should not be relied on:
strategic merge by name is order-insensitive.
- The operator-managed ServiceAccount's imagePullSecrets list is populated
ONLY from this field. spec.podTemplateSpec.spec.imagePullSecrets does not
reach the ServiceAccount because PodTemplateSpec has no notion of a
ServiceAccount. To make a secret usable via the ServiceAccount path
(e.g. for sidecars or init containers that pull images independently),
list it here rather than under spec.podTemplateSpec.
Note on cross-CRD consistency:
MCPRegistry currently uses an atomic-replace strategy for its imagePullSecrets
(the user-provided value replaces the controller-generated list rather than
being merged on top). VirtualMCPServer follows the Kubernetes-native
strategic-merge-by-name behavior described above. Aligning the two is tracked
as a separate follow-up; until then, manifests that set imagePullSecrets on
both CRDs will see different override behavior between them. | | Optional: \{\}
| +| `rateLimiting` _[api.v1beta1.RateLimitConfig](#apiv1beta1ratelimitconfig)_ | RateLimiting defines rate limiting configuration for the Virtual MCP server.
Requires Redis session storage to be configured for distributed rate limiting. | | Optional: \{\}
| #### api.v1beta1.VirtualMCPServerStatus diff --git a/pkg/ratelimit/internal/bucket/bucket.go b/pkg/ratelimit/internal/bucket/bucket.go index 28903bcdcd..d68a1709c8 100644 --- a/pkg/ratelimit/internal/bucket/bucket.go +++ b/pkg/ratelimit/internal/bucket/bucket.go @@ -90,7 +90,7 @@ type TokenBucket struct { } // New creates a TokenBucket. The Redis key is derived from namespace, server -// name, and suffix (e.g., "global" or "global:tool:search"). +// name, and suffix (e.g., "shared" or "shared:tool:search"). func New(namespace, serverName, suffix string, maxTokens int32, refillPeriod time.Duration) *TokenBucket { refillSec := refillPeriod.Seconds() return &TokenBucket{ diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go index a007191d3c..147e22cd7e 100644 --- a/pkg/ratelimit/limiter_test.go +++ b/pkg/ratelimit/limiter_test.go @@ -70,7 +70,7 @@ func TestNewLimiter_ZeroDuration(t *testing.T) { assert.Contains(t, err.Error(), "refillPeriod must be positive") } -func TestLimiter_ServerGlobalExhausted(t *testing.T) { +func TestLimiter_ServerSharedExhausted(t *testing.T) { t.Parallel() client, _ := newTestClient(t) ctx := t.Context() @@ -93,6 +93,35 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) { assert.Greater(t, d.RetryAfter, time.Duration(0)) } +func TestLimiter_SharedUsesRedisKeys(t *testing.T) { + t.Parallel() + client, _ := newTestClient(t) + ctx := t.Context() + + crd := &v1beta1.RateLimitConfig{ + Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + Tools: []v1beta1.ToolRateLimitConfig{ + { + Name: "search", + Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + }, + }, + } + l, err := NewLimiter(client, "ns", "srv", crd) + require.NoError(t, err) + + d, err := l.Allow(ctx, "search", "") + require.NoError(t, err) + require.True(t, d.Allowed) + + serverKey := "thv:rl:{ns:srv}:shared" + toolKey := "thv:rl:{ns:srv}:shared:tool:search" + + exists, err := client.Exists(ctx, serverKey, toolKey).Result() + require.NoError(t, err) + assert.Equal(t, int64(2), exists) +} + func TestLimiter_PerToolIsolation(t *testing.T) { t.Parallel() client, _ := newTestClient(t) diff --git a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go index 12cebf47d2..61a24c86be 100644 --- a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go +++ b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go @@ -10,6 +10,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -460,9 +461,30 @@ var _ = Describe("VirtualMCPServer Circuit Breaker Lifecycle", Ordered, func() { backend.Spec.Image = images.YardstickServerImage Expect(k8sClient.Update(ctx, backend)).To(Succeed()) + By("Waiting for backend StatefulSet template to use the fixed image") + Eventually(func() error { + sts := &appsv1.StatefulSet{} + if err := k8sClient.Get(ctx, types.NamespacedName{ + Name: backend2Name, + Namespace: testNamespace, + }, sts); err != nil { + return err + } + for _, container := range sts.Spec.Template.Spec.Containers { + if container.Name == "mcp" { + if container.Image != images.YardstickServerImage { + return fmt.Errorf("statefulset still has image %q", container.Image) + } + return nil + } + } + return fmt.Errorf("mcp container not found in statefulset template") + }, timeout, pollingInterval).Should(Succeed()) + By("Deleting stuck pods to force recreation with fixed image") // Pods in ImagePullBackOff don't automatically recreate when image is fixed - // Delete them to force the statefulset to create new pods with the correct image + // Delete them after the statefulset template is updated, otherwise the old template + // can immediately recreate the pod with the broken image again. podList := &corev1.PodList{} Expect(k8sClient.List(ctx, podList, client.InNamespace(testNamespace),