diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types.go b/cmd/thv-operator/api/v1beta1/mcpserver_types.go
index 3a21eac58e..35fb76fe5b 100644
--- a/cmd/thv-operator/api/v1beta1/mcpserver_types.go
+++ b/cmd/thv-operator/api/v1beta1/mcpserver_types.go
@@ -507,18 +507,18 @@ type SessionStorageConfig struct {
 //
 // +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured"
 //
-//nolint:lll // CEL validation rules exceed line length limit
+//nolint:lll // kubebuilder marker exceeds line length
 type RateLimitConfig struct {
 	// Shared is a token bucket shared across all users for the entire server.
 	// +optional
-	Shared *RateLimitBucket `json:"shared,omitempty"`
+	Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"`
 
 	// PerUser is a token bucket applied independently to each authenticated user
 	// at the server level. Requires authentication to be enabled.
 	// Each unique userID creates Redis keys that expire after 2x refillPeriod.
 	// Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
 	// +optional
-	PerUser *RateLimitBucket `json:"perUser,omitempty"`
+	PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"`
 
 	// Tools defines per-tool rate limit overrides.
 	// Each entry applies additional rate limits to calls targeting a specific tool name.
@@ -526,24 +526,24 @@ type RateLimitConfig struct {
 	// +listType=map
 	// +listMapKey=name
 	// +optional
-	Tools []ToolRateLimitConfig `json:"tools,omitempty"`
+	Tools []ToolRateLimitConfig `json:"tools,omitempty" yaml:"tools,omitempty"`
 }
 
 // RateLimitBucket defines a token bucket configuration with a maximum capacity
-// and a refill period. Used by both shared (global) and per-user rate limits.
+// and a refill period. Used by both shared and per-user rate limits.
 type RateLimitBucket struct {
 	// MaxTokens is the maximum number of tokens (bucket capacity).
 	// This is also the burst size: the maximum number of requests that can be served
 	// instantaneously before the bucket is depleted.
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:Minimum=1
-	MaxTokens int32 `json:"maxTokens"`
+	MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"`
 
 	// RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
 	// The effective refill rate is maxTokens / refillPeriod tokens per second.
 	// Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
 	// +kubebuilder:validation:Required
-	RefillPeriod metav1.Duration `json:"refillPeriod"`
+	RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"`
 }
 
 // ToolRateLimitConfig defines rate limits for a specific tool.
@@ -556,15 +556,15 @@ type ToolRateLimitConfig struct {
 	// Name is the MCP tool name this limit applies to.
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:MinLength=1
-	Name string `json:"name"`
+	Name string `json:"name" yaml:"name"`
 
 	// Shared token bucket for this specific tool.
 	// +optional
-	Shared *RateLimitBucket `json:"shared,omitempty"`
+	Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"`
 
 	// PerUser token bucket configuration for this tool.
 	// +optional
-	PerUser *RateLimitBucket `json:"perUser,omitempty"`
+	PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"`
 }
 
 // Permission profile types
diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go
index 0e69f33836..4a886bb74f 100644
--- a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go
+++ b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go
@@ -116,6 +116,44 @@ func TestRateLimitConfigJSONRoundtrip(t *testing.T) {
 	}
 }
 
+func TestVirtualMCPServerSpecRateLimitingJSONRoundtrip(t *testing.T) {
+	t.Parallel()
+
+	spec := VirtualMCPServerSpec{
+		IncomingAuth: &IncomingAuthConfig{Type: "oidc"},
+		GroupRef:     &MCPGroupRef{Name: "group-a"},
+		SessionStorage: &SessionStorageConfig{
+			Provider: "redis",
+			Address:  "redis.default.svc.cluster.local:6379",
+		},
+		RateLimiting: &RateLimitConfig{
+			Shared: &RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+			PerUser: &RateLimitBucket{
+				MaxTokens:    2,
+				RefillPeriod: metav1.Duration{Duration: time.Minute},
+			},
+			Tools: []ToolRateLimitConfig{
+				{
+					Name: "backend_a_echo",
+					Shared: &RateLimitBucket{
+						MaxTokens:    5,
+						RefillPeriod: metav1.Duration{Duration: 30 * time.Second},
+					},
+				},
+			},
+		},
+	}
+
+	b, err := json.Marshal(spec)
+	require.NoError(t, err)
+	out := string(b)
+	assert.Contains(t, out, `"rateLimiting"`)
+	assert.Contains(t, out, `"shared"`)
+	assert.Contains(t, out, `"perUser"`)
+	assert.Contains(t, out, `"backend_a_echo"`)
+	assert.NotContains(t, out, `"config":{"rateLimiting"`)
+}
+
 func TestMCPServerSpecScalingFieldsJSONRoundtrip(t *testing.T) {
 	t.Parallel()
 
diff --git a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go
index c63139b133..a23f2feaaa 100644
--- a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go
+++ b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go
@@ -16,6 +16,10 @@ import (
 
 // VirtualMCPServerSpec defines the desired state of VirtualMCPServer
 //
+// +kubebuilder:validation:XValidation:rule="!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider == 'redis')",message="rateLimiting requires sessionStorage with provider 'redis'"
+// +kubebuilder:validation:XValidation:rule="!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="rateLimiting.perUser requires incomingAuth.type oidc"
+// +kubebuilder:validation:XValidation:rule="!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="per-tool perUser rate limiting requires incomingAuth.type oidc"
+//
 //nolint:lll // CEL validation rules exceed line length limit
 type VirtualMCPServerSpec struct {
 	// IncomingAuth configures authentication for clients connecting to the Virtual MCP server.
@@ -143,6 +147,11 @@ type VirtualMCPServerSpec struct {
 	// +listType=atomic
 	// +optional
 	ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"`
+
+	// RateLimiting defines rate limiting configuration for the Virtual MCP server.
+	// Requires Redis session storage to be configured for distributed rate limiting.
+	// +optional
+	RateLimiting *RateLimitConfig `json:"rateLimiting,omitempty"`
 }
 
 // EmbeddingServerRef references an existing EmbeddingServer resource by name.
diff --git a/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go
index 04da44756d..6a58686950 100644
--- a/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go
+++ b/cmd/thv-operator/api/v1beta1/zz_generated.deepcopy.go
@@ -3008,6 +3008,11 @@ func (in *VirtualMCPServerSpec) DeepCopyInto(out *VirtualMCPServerSpec) {
 		*out = make([]corev1.LocalObjectReference, len(*in))
 		copy(*out, *in)
 	}
+	if in.RateLimiting != nil {
+		in, out := &in.RateLimiting, &out.RateLimiting
+		*out = new(RateLimitConfig)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualMCPServerSpec.
diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go
index 5d0fe5efde..2da5a24a45 100644
--- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go
+++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go
@@ -507,6 +507,11 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) {
 	assert.Equal(t, "test-vmcp-vmcp-config", cm.Name)
 	assert.Contains(t, cm.Data, "config.yaml")
 	assert.NotEmpty(t, cm.Annotations["toolhive.stacklok.dev/content-checksum"])
+
+	var cfg vmcpconfig.Config
+	require.NoError(t, yaml.Unmarshal([]byte(cm.Data["config.yaml"]), &cfg))
+	assert.Equal(t, "test-vmcp", cfg.Name)
+	assert.Equal(t, "test-group", cfg.Group)
 }
 
 // TestSetAuthConfigConditions tests that auth config conditions reflect the current state
diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go
index b91f9d021d..c4a3484bd2 100644
--- a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go
+++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go
@@ -466,6 +466,7 @@ var _ = Describe("EmbeddingServer Controller Update Tests", func() {
 				Expect(k8sClient.Create(ctx, embeddingServer)).To(Succeed())
 				Eventually(func(g Gomega) {
 					g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &appsv1.StatefulSet{})).To(Succeed())
+					g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &corev1.Service{})).To(Succeed())
 				}, timeout, interval).Should(Succeed())
 			})
 
diff --git a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go
index 45b6043196..6a74efd2aa 100644
--- a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go
+++ b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go
@@ -5,6 +5,8 @@
 package controllers
 
 import (
+	"time"
+
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -106,4 +108,60 @@ var _ = Describe("CEL Validation for SessionStorageConfig on VirtualMCPServer",
 				Expect(err).To(HaveOccurred())
 			})
 		})
+
+		Context("rateLimiting", func() {
+			It("should reject rate limiting without redis session storage", func() {
+				vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-no-redis", nil)
+				vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{
+					Shared: &mcpv1beta1.RateLimitBucket{
+						MaxTokens:    1,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				}
+
+				err := k8sClient.Create(ctx, vmcp)
+				Expect(err).To(HaveOccurred())
+				Expect(err.Error()).To(ContainSubstring("rateLimiting requires sessionStorage with provider 'redis'"))
+			})
+
+			It("should reject perUser rate limiting with anonymous auth", func() {
+				vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-anon", &mcpv1beta1.SessionStorageConfig{
+					Provider: "redis",
+					Address:  "redis:6379",
+				})
+				vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{
+					PerUser: &mcpv1beta1.RateLimitBucket{
+						MaxTokens:    1,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				}
+
+				err := k8sClient.Create(ctx, vmcp)
+				Expect(err).To(HaveOccurred())
+				Expect(err.Error()).To(ContainSubstring("rateLimiting.perUser requires incomingAuth.type oidc"))
+			})
+
+			It("should accept perUser rate limiting with oidc auth and redis session storage", func() {
+				vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-oidc", &mcpv1beta1.SessionStorageConfig{
+					Provider: "redis",
+					Address:  "redis:6379",
+				})
+				vmcp.Spec.IncomingAuth = &mcpv1beta1.IncomingAuthConfig{
+					Type: "oidc",
+					OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{
+						Name:     "oidc",
+						Audience: "test-audience",
+					},
+				}
+				vmcp.Spec.RateLimiting = &mcpv1beta1.RateLimitConfig{
+					PerUser: &mcpv1beta1.RateLimitBucket{
+						MaxTokens:    1,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				}
+
+				err := k8sClient.Create(ctx, vmcp)
+				Expect(err).NotTo(HaveOccurred())
+			})
+		})
 	})
diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml
index a51fe4b5bd..913eb6ca9e 100644
--- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml
+++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml
@@ -2242,6 +2242,131 @@ spec:
                   This field accepts a PodTemplateSpec object as JSON/YAML.
                 type: object
                 x-kubernetes-preserve-unknown-fields: true
+              rateLimiting:
+                description: |-
+                  RateLimiting defines rate limiting configuration for the Virtual MCP server.
+                  Requires Redis session storage to be configured for distributed rate limiting.
+                properties:
+                  perUser:
+                    description: |-
+                      PerUser is a token bucket applied independently to each authenticated user
+                      at the server level. Requires authentication to be enabled.
+                      Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                      Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
+                  shared:
+                    description: Shared is a token bucket shared across all users
+                      for the entire server.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
+                  tools:
+                    description: |-
+                      Tools defines per-tool rate limit overrides.
+                      Each entry applies additional rate limits to calls targeting a specific tool name.
+                      A request must pass both the server-level limit and the per-tool limit.
+                    items:
+                      description: |-
+                        ToolRateLimitConfig defines rate limits for a specific tool.
+                        At least one of shared or perUser must be configured.
+                      properties:
+                        name:
+                          description: Name is the MCP tool name this limit applies
+                            to.
+                          minLength: 1
+                          type: string
+                        perUser:
+                          description: PerUser token bucket configuration for this
+                            tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
+                        shared:
+                          description: Shared token bucket for this specific tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
+                      required:
+                      - name
+                      type: object
+                      x-kubernetes-validations:
+                      - message: at least one of shared or perUser must be configured
+                        rule: has(self.shared) || has(self.perUser)
+                    type: array
+                    x-kubernetes-list-map-keys:
+                    - name
+                    x-kubernetes-list-type: map
+                type: object
+                x-kubernetes-validations:
+                - message: at least one of shared, perUser, or tools must be configured
+                  rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                    && size(self.tools) > 0)
               replicas:
                 description: |-
                   Replicas is the desired number of vMCP pod replicas.
@@ -2347,6 +2472,17 @@ spec:
             - groupRef
             - incomingAuth
             type: object
+            x-kubernetes-validations:
+            - message: rateLimiting requires sessionStorage with provider 'redis'
+              rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider
+                == ''redis'')'
+            - message: rateLimiting.perUser requires incomingAuth.type oidc
+              rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) ||
+                (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')'
+            - message: per-tool perUser rate limiting requires incomingAuth.type oidc
+              rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t,
+                !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type
+                == ''oidc'')'
           status:
             description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer
             properties:
@@ -4738,6 +4874,131 @@ spec:
                   This field accepts a PodTemplateSpec object as JSON/YAML.
                 type: object
                 x-kubernetes-preserve-unknown-fields: true
+              rateLimiting:
+                description: |-
+                  RateLimiting defines rate limiting configuration for the Virtual MCP server.
+                  Requires Redis session storage to be configured for distributed rate limiting.
+                properties:
+                  perUser:
+                    description: |-
+                      PerUser is a token bucket applied independently to each authenticated user
+                      at the server level. Requires authentication to be enabled.
+                      Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                      Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
+                  shared:
+                    description: Shared is a token bucket shared across all users
+                      for the entire server.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
+                  tools:
+                    description: |-
+                      Tools defines per-tool rate limit overrides.
+                      Each entry applies additional rate limits to calls targeting a specific tool name.
+                      A request must pass both the server-level limit and the per-tool limit.
+                    items:
+                      description: |-
+                        ToolRateLimitConfig defines rate limits for a specific tool.
+                        At least one of shared or perUser must be configured.
+                      properties:
+                        name:
+                          description: Name is the MCP tool name this limit applies
+                            to.
+                          minLength: 1
+                          type: string
+                        perUser:
+                          description: PerUser token bucket configuration for this
+                            tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
+                        shared:
+                          description: Shared token bucket for this specific tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
+                      required:
+                      - name
+                      type: object
+                      x-kubernetes-validations:
+                      - message: at least one of shared or perUser must be configured
+                        rule: has(self.shared) || has(self.perUser)
+                    type: array
+                    x-kubernetes-list-map-keys:
+                    - name
+                    x-kubernetes-list-type: map
+                type: object
+                x-kubernetes-validations:
+                - message: at least one of shared, perUser, or tools must be configured
+                  rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                    && size(self.tools) > 0)
               replicas:
                 description: |-
                   Replicas is the desired number of vMCP pod replicas.
@@ -4843,6 +5104,17 @@ spec:
             - groupRef
             - incomingAuth
             type: object
+            x-kubernetes-validations:
+            - message: rateLimiting requires sessionStorage with provider 'redis'
+              rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider
+                == ''redis'')'
+            - message: rateLimiting.perUser requires incomingAuth.type oidc
+              rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) ||
+                (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')'
+            - message: per-tool perUser rate limiting requires incomingAuth.type oidc
+              rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t,
+                !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type
+                == ''oidc'')'
           status:
             description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer
             properties:
diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml
index 6078670479..1429da0fbe 100644
--- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml
+++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml
@@ -2245,6 +2245,131 @@ spec:
                   This field accepts a PodTemplateSpec object as JSON/YAML.
                 type: object
                 x-kubernetes-preserve-unknown-fields: true
+              rateLimiting:
+                description: |-
+                  RateLimiting defines rate limiting configuration for the Virtual MCP server.
+                  Requires Redis session storage to be configured for distributed rate limiting.
+                properties:
+                  perUser:
+                    description: |-
+                      PerUser is a token bucket applied independently to each authenticated user
+                      at the server level. Requires authentication to be enabled.
+                      Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                      Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
+                  shared:
+                    description: Shared is a token bucket shared across all users
+                      for the entire server.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
+                  tools:
+                    description: |-
+                      Tools defines per-tool rate limit overrides.
+                      Each entry applies additional rate limits to calls targeting a specific tool name.
+                      A request must pass both the server-level limit and the per-tool limit.
+                    items:
+                      description: |-
+                        ToolRateLimitConfig defines rate limits for a specific tool.
+                        At least one of shared or perUser must be configured.
+                      properties:
+                        name:
+                          description: Name is the MCP tool name this limit applies
+                            to.
+                          minLength: 1
+                          type: string
+                        perUser:
+                          description: PerUser token bucket configuration for this
+                            tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
+                        shared:
+                          description: Shared token bucket for this specific tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
+                      required:
+                      - name
+                      type: object
+                      x-kubernetes-validations:
+                      - message: at least one of shared or perUser must be configured
+                        rule: has(self.shared) || has(self.perUser)
+                    type: array
+                    x-kubernetes-list-map-keys:
+                    - name
+                    x-kubernetes-list-type: map
+                type: object
+                x-kubernetes-validations:
+                - message: at least one of shared, perUser, or tools must be configured
+                  rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                    && size(self.tools) > 0)
               replicas:
                 description: |-
                   Replicas is the desired number of vMCP pod replicas.
@@ -2350,6 +2475,17 @@ spec:
             - groupRef
             - incomingAuth
             type: object
+            x-kubernetes-validations:
+            - message: rateLimiting requires sessionStorage with provider 'redis'
+              rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider
+                == ''redis'')'
+            - message: rateLimiting.perUser requires incomingAuth.type oidc
+              rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) ||
+                (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')'
+            - message: per-tool perUser rate limiting requires incomingAuth.type oidc
+              rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t,
+                !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type
+                == ''oidc'')'
           status:
             description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer
             properties:
@@ -4741,6 +4877,131 @@ spec:
                   This field accepts a PodTemplateSpec object as JSON/YAML.
                 type: object
                 x-kubernetes-preserve-unknown-fields: true
+              rateLimiting:
+                description: |-
+                  RateLimiting defines rate limiting configuration for the Virtual MCP server.
+                  Requires Redis session storage to be configured for distributed rate limiting.
+                properties:
+                  perUser:
+                    description: |-
+                      PerUser is a token bucket applied independently to each authenticated user
+                      at the server level. Requires authentication to be enabled.
+                      Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                      Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
+                  shared:
+                    description: Shared is a token bucket shared across all users
+                      for the entire server.
+                    properties:
+                      maxTokens:
+                        description: |-
+                          MaxTokens is the maximum number of tokens (bucket capacity).
+                          This is also the burst size: the maximum number of requests that can be served
+                          instantaneously before the bucket is depleted.
+                        format: int32
+                        minimum: 1
+                        type: integer
+                      refillPeriod:
+                        description: |-
+                          RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                          The effective refill rate is maxTokens / refillPeriod tokens per second.
+                          Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                        type: string
+                    required:
+                    - maxTokens
+                    - refillPeriod
+                    type: object
+                  tools:
+                    description: |-
+                      Tools defines per-tool rate limit overrides.
+                      Each entry applies additional rate limits to calls targeting a specific tool name.
+                      A request must pass both the server-level limit and the per-tool limit.
+                    items:
+                      description: |-
+                        ToolRateLimitConfig defines rate limits for a specific tool.
+                        At least one of shared or perUser must be configured.
+                      properties:
+                        name:
+                          description: Name is the MCP tool name this limit applies
+                            to.
+                          minLength: 1
+                          type: string
+                        perUser:
+                          description: PerUser token bucket configuration for this
+                            tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
+                        shared:
+                          description: Shared token bucket for this specific tool.
+                          properties:
+                            maxTokens:
+                              description: |-
+                                MaxTokens is the maximum number of tokens (bucket capacity).
+                                This is also the burst size: the maximum number of requests that can be served
+                                instantaneously before the bucket is depleted.
+                              format: int32
+                              minimum: 1
+                              type: integer
+                            refillPeriod:
+                              description: |-
+                                RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                              type: string
+                          required:
+                          - maxTokens
+                          - refillPeriod
+                          type: object
+                      required:
+                      - name
+                      type: object
+                      x-kubernetes-validations:
+                      - message: at least one of shared or perUser must be configured
+                        rule: has(self.shared) || has(self.perUser)
+                    type: array
+                    x-kubernetes-list-map-keys:
+                    - name
+                    x-kubernetes-list-type: map
+                type: object
+                x-kubernetes-validations:
+                - message: at least one of shared, perUser, or tools must be configured
+                  rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                    && size(self.tools) > 0)
               replicas:
                 description: |-
                   Replicas is the desired number of vMCP pod replicas.
@@ -4846,6 +5107,17 @@ spec:
             - groupRef
             - incomingAuth
             type: object
+            x-kubernetes-validations:
+            - message: rateLimiting requires sessionStorage with provider 'redis'
+              rule: '!has(self.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider
+                == ''redis'')'
+            - message: rateLimiting.perUser requires incomingAuth.type oidc
+              rule: '!(has(self.rateLimiting) && has(self.rateLimiting.perUser)) ||
+                (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')'
+            - message: per-tool perUser rate limiting requires incomingAuth.type oidc
+              rule: '!has(self.rateLimiting) || !has(self.rateLimiting.tools) || self.rateLimiting.tools.all(t,
+                !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type
+                == ''oidc'')'
           status:
             description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer
             properties:
diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md
index b6e0c50cda..9edb64f5ba 100644
--- a/docs/operator/crd-api.md
+++ b/docs/operator/crd-api.md
@@ -2687,7 +2687,7 @@ _Appears in:_
 
 
 RateLimitBucket defines a token bucket configuration with a maximum capacity
-and a refill period. Used by both shared (global) and per-user rate limits.
+and a refill period. Used by both shared and per-user rate limits.
 
 
 
@@ -2712,6 +2712,7 @@ At least one of shared, perUser, or tools must be configured.
 
 _Appears in:_
 - [api.v1beta1.MCPServerSpec](#apiv1beta1mcpserverspec)
+- [api.v1beta1.VirtualMCPServerSpec](#apiv1beta1virtualmcpserverspec)
 
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
@@ -3439,6 +3440,7 @@ _Appears in:_
 | `replicas` _integer_ | Replicas is the desired number of vMCP pod replicas.<br />VirtualMCPServer creates a single Deployment for the vMCP aggregator process,<br />so there is only one replicas field (unlike MCPServer which has separate<br />Replicas and BackendReplicas for its two Deployments).<br />When nil, the operator does not set Deployment.Spec.Replicas, leaving replica<br />management to an HPA or other external controller. |  | Minimum: 0 <br />Optional: \{\} <br /> |
 | `sessionStorage` _[api.v1beta1.SessionStorageConfig](#apiv1beta1sessionstorageconfig)_ | SessionStorage configures session storage for stateful horizontal scaling.<br />When nil, no session storage is configured. |  | Optional: \{\} <br /> |
 | `imagePullSecrets` _[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#localobjectreference-v1-core) array_ | ImagePullSecrets allows specifying image pull secrets for the vMCP workload.<br />These are applied to both the vMCP Deployment's PodSpec.ImagePullSecrets<br />and to the operator-managed ServiceAccount the vMCP server runs as, so private<br />images are pullable through either path.<br />Merge semantics with PodTemplateSpec:<br />The deployed PodSpec.ImagePullSecrets is the Kubernetes-native strategic-merge<br />union of this field and spec.podTemplateSpec.spec.imagePullSecrets, merged by<br />the patchStrategy:"merge" / patchMergeKey:"name" tags on corev1.PodSpec.<br />  - This field is rendered first as the controller-generated default.<br />  - spec.podTemplateSpec.spec.imagePullSecrets is then strategic-merge-patched<br />    on top, keyed by Name. Distinct names from the two sources are unioned in<br />    the resulting list; entries with the same Name are deduplicated and the<br />    PodTemplateSpec entry wins on overlap (user override).<br />  - Order in the resulting list is not guaranteed and should not be relied on:<br />    strategic merge by name is order-insensitive.<br />  - The operator-managed ServiceAccount's imagePullSecrets list is populated<br />    ONLY from this field. spec.podTemplateSpec.spec.imagePullSecrets does not<br />    reach the ServiceAccount because PodTemplateSpec has no notion of a<br />    ServiceAccount. To make a secret usable via the ServiceAccount path<br />    (e.g. for sidecars or init containers that pull images independently),<br />    list it here rather than under spec.podTemplateSpec.<br />Note on cross-CRD consistency:<br />MCPRegistry currently uses an atomic-replace strategy for its imagePullSecrets<br />(the user-provided value replaces the controller-generated list rather than<br />being merged on top). VirtualMCPServer follows the Kubernetes-native<br />strategic-merge-by-name behavior described above. Aligning the two is tracked<br />as a separate follow-up; until then, manifests that set imagePullSecrets on<br />both CRDs will see different override behavior between them. |  | Optional: \{\} <br /> |
+| `rateLimiting` _[api.v1beta1.RateLimitConfig](#apiv1beta1ratelimitconfig)_ | RateLimiting defines rate limiting configuration for the Virtual MCP server.<br />Requires Redis session storage to be configured for distributed rate limiting. |  | Optional: \{\} <br /> |
 
 
 #### api.v1beta1.VirtualMCPServerStatus
diff --git a/pkg/ratelimit/internal/bucket/bucket.go b/pkg/ratelimit/internal/bucket/bucket.go
index 28903bcdcd..d68a1709c8 100644
--- a/pkg/ratelimit/internal/bucket/bucket.go
+++ b/pkg/ratelimit/internal/bucket/bucket.go
@@ -90,7 +90,7 @@ type TokenBucket struct {
 }
 
 // New creates a TokenBucket. The Redis key is derived from namespace, server
-// name, and suffix (e.g., "global" or "global:tool:search").
+// name, and suffix (e.g., "shared" or "shared:tool:search").
 func New(namespace, serverName, suffix string, maxTokens int32, refillPeriod time.Duration) *TokenBucket {
 	refillSec := refillPeriod.Seconds()
 	return &TokenBucket{
diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go
index a007191d3c..147e22cd7e 100644
--- a/pkg/ratelimit/limiter_test.go
+++ b/pkg/ratelimit/limiter_test.go
@@ -70,7 +70,7 @@ func TestNewLimiter_ZeroDuration(t *testing.T) {
 	assert.Contains(t, err.Error(), "refillPeriod must be positive")
 }
 
-func TestLimiter_ServerGlobalExhausted(t *testing.T) {
+func TestLimiter_ServerSharedExhausted(t *testing.T) {
 	t.Parallel()
 	client, _ := newTestClient(t)
 	ctx := t.Context()
@@ -93,6 +93,35 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) {
 	assert.Greater(t, d.RetryAfter, time.Duration(0))
 }
 
+func TestLimiter_SharedUsesRedisKeys(t *testing.T) {
+	t.Parallel()
+	client, _ := newTestClient(t)
+	ctx := t.Context()
+
+	crd := &v1beta1.RateLimitConfig{
+		Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+		Tools: []v1beta1.ToolRateLimitConfig{
+			{
+				Name:   "search",
+				Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+			},
+		},
+	}
+	l, err := NewLimiter(client, "ns", "srv", crd)
+	require.NoError(t, err)
+
+	d, err := l.Allow(ctx, "search", "")
+	require.NoError(t, err)
+	require.True(t, d.Allowed)
+
+	serverKey := "thv:rl:{ns:srv}:shared"
+	toolKey := "thv:rl:{ns:srv}:shared:tool:search"
+
+	exists, err := client.Exists(ctx, serverKey, toolKey).Result()
+	require.NoError(t, err)
+	assert.Equal(t, int64(2), exists)
+}
+
 func TestLimiter_PerToolIsolation(t *testing.T) {
 	t.Parallel()
 	client, _ := newTestClient(t)
diff --git a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go
index 12cebf47d2..61a24c86be 100644
--- a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go
+++ b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go
@@ -10,6 +10,7 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
@@ -460,9 +461,30 @@ var _ = Describe("VirtualMCPServer Circuit Breaker Lifecycle", Ordered, func() {
 		backend.Spec.Image = images.YardstickServerImage
 		Expect(k8sClient.Update(ctx, backend)).To(Succeed())
 
+		By("Waiting for backend StatefulSet template to use the fixed image")
+		Eventually(func() error {
+			sts := &appsv1.StatefulSet{}
+			if err := k8sClient.Get(ctx, types.NamespacedName{
+				Name:      backend2Name,
+				Namespace: testNamespace,
+			}, sts); err != nil {
+				return err
+			}
+			for _, container := range sts.Spec.Template.Spec.Containers {
+				if container.Name == "mcp" {
+					if container.Image != images.YardstickServerImage {
+						return fmt.Errorf("statefulset still has image %q", container.Image)
+					}
+					return nil
+				}
+			}
+			return fmt.Errorf("mcp container not found in statefulset template")
+		}, timeout, pollingInterval).Should(Succeed())
+
 		By("Deleting stuck pods to force recreation with fixed image")
 		// Pods in ImagePullBackOff don't automatically recreate when image is fixed
-		// Delete them to force the statefulset to create new pods with the correct image
+		// Delete them after the statefulset template is updated, otherwise the old template
+		// can immediately recreate the pod with the broken image again.
 		podList := &corev1.PodList{}
 		Expect(k8sClient.List(ctx, podList,
 			client.InNamespace(testNamespace),