diff --git a/.claude/skills/doc-tests/SKILL.md b/.claude/skills/doc-tests/SKILL.md index 0f38c7cb..303fe2b4 100644 --- a/.claude/skills/doc-tests/SKILL.md +++ b/.claude/skills/doc-tests/SKILL.md @@ -168,6 +168,7 @@ For deeper assertions (e.g. verifying `tools/list` returns only authorized tools - For Kubernetes tests, use `${INGRESS_GW_ADDRESS}` as the host in the URL (e.g. `url: "http://${INGRESS_GW_ADDRESS}:80/get"`). **Never use `kubectl port-forward`** in visible blocks — tests containing `kubectl port-forward` are automatically failed without running. - **Host headers must not include a port** — use `host: "example.com"`, not `host: "example.com:80"`. The gateway's hostname matching is strict: including the port causes no route match, and agentgateway resets the TCP connection (`ECONNRESET`) rather than returning an HTTP error response. +- **Response body CEL expressions require uncompressed responses.** YAMLTest uses axios, which sends `Accept-Encoding: gzip, deflate, br` by default. If a test asserts on a response header computed from a `json(response.body)` CEL expression (e.g. `string(json(response.body).model)`), the upstream may return a compressed body that agentgateway cannot parse, causing the CEL expression to fail silently and the header to never be set. Fix by adding `accept-encoding: identity` to the YAMLTest `http.headers` to force an uncompressed response. This only affects test requests — `curl` and most API clients do not request compression by default, so user-facing examples work without the header. #### Data plane warmup for new hostnames diff --git a/assets/agw-docs/pages/agentgateway/llm/transformations.md b/assets/agw-docs/pages/agentgateway/llm/transformations.md index 3c83fa59..2a6c044c 100644 --- a/assets/agw-docs/pages/agentgateway/llm/transformations.md +++ b/assets/agw-docs/pages/agentgateway/llm/transformations.md @@ -9,6 +9,7 @@ To learn more about CEL, see the following resources: {{< reuse "agw-docs/snippets/agw-prereq-llm.md" >}} + ## Configure LLM request transformations {{< doc-test paths="llm-transformations" >}} @@ -23,12 +24,8 @@ spec: - name: agentgateway-proxy namespace: {{< reuse "agw-docs/snippets/namespace.md" >}} rules: - - matches: - - path: - type: PathPrefix - value: /v1/chat/completions - backendRefs: - - name: httpbun-llm + - backendRefs: + - name: openai namespace: {{< reuse "agw-docs/snippets/namespace.md" >}} group: {{< reuse "agw-docs/snippets/group.md" >}} kind: {{< reuse "agw-docs/snippets/backend.md" >}} @@ -54,7 +51,7 @@ YAMLTest -f - <<'EOF' EOF {{< /doc-test >}} -1. Create an {{< reuse "agw-docs/snippets/trafficpolicy.md" >}} resource to apply an LLM request transformation. The following example caps `max_tokens` to 10, regardless of what the client requests. +1. Create an {{< reuse "agw-docs/snippets/trafficpolicy.md" >}} resource to apply an LLM request transformation. The following example limits `max_completion_tokens` to no more than 10. If the client requests less than 10 tokens, this number is applied. If the client requests more than 10 tokens, the maximum number of 10 is applied. ```yaml {paths="llm-transformations"} kubectl apply -f- <}} -2. Send a request with `max_tokens` set to a value greater than 10. The transformation caps it to 10 before the request reaches the LLM provider. Verify that the `completion_tokens` value in the response is 10 or fewer, the response is capped and the `finish_reason` is set to `length`. +2. Verify that the {{< reuse "agw-docs/snippets/trafficpolicy.md" >}} is accepted. + + ```sh + kubectl get {{< reuse "agw-docs/snippets/trafficpolicy.md" >}} cap-max-tokens -n {{< reuse "agw-docs/snippets/namespace.md" >}} -o jsonpath='{.status.ancestors[0].conditions[?(@.type=="Accepted")].status}' + ``` + +3. Send a request with `max_completion_tokens` set to a value greater than 10. The transformation limits it to 10 before the request reaches the LLM provider. Verify that the `completion_tokens` value in the response is 10 or fewer and the `finish_reason` is set to `length`. + + {{< callout type="info" >}} + Some older OpenAI models use `max_tokens` instead of `max_completion_tokens`. If the transformation does not appear to take effect, check the model's API documentation for the correct field name and update the transformation's `field` value accordingly. + {{< /callout >}} {{< tabs tabTotal="2" items="Cloud Provider LoadBalancer,Port-forward for local testing" >}} @@ -119,7 +126,7 @@ EOF -H "content-type: application/json" \ -d '{ "model": "gpt-3.5-turbo", - "max_tokens": 5000, + "max_completion_tokens": 5000, "messages": [ { "role": "user", @@ -136,7 +143,7 @@ EOF -H "content-type: application/json" \ -d '{ "model": "gpt-3.5-turbo", - "max_tokens": 5000, + "max_completion_tokens": 5000, "messages": [ { "role": "user", @@ -151,14 +158,14 @@ EOF {{< doc-test paths="llm-transformations" >}} YAMLTest -f - <<'EOF' - - name: verify request succeeds with max_tokens transformation applied + - name: verify request succeeds with max_completion_tokens transformation applied http: url: "http://${INGRESS_GW_ADDRESS}/v1/chat/completions" method: POST headers: content-type: application/json body: | - {"model": "gpt-4", "max_tokens": 5000, "messages": [{"role": "user", "content": "Tell me a short story"}]} + {"model": "gpt-4", "max_completion_tokens": 5000, "messages": [{"role": "user", "content": "Tell me a short story"}]} source: type: local expect: @@ -225,12 +232,8 @@ spec: - name: agentgateway-proxy namespace: {{< reuse "agw-docs/snippets/namespace.md" >}} rules: - - matches: - - path: - type: PathPrefix - value: /v1/chat/completions - backendRefs: - - name: httpbun-llm + - backendRefs: + - name: openai namespace: {{< reuse "agw-docs/snippets/namespace.md" >}} group: {{< reuse "agw-docs/snippets/group.md" >}} kind: {{< reuse "agw-docs/snippets/backend.md" >}} @@ -299,24 +302,6 @@ EOF polling: timeoutSeconds: 120 intervalSeconds: 2 - - name: verify model headers are injected from request and response bodies - http: - url: "http://${INGRESS_GW_ADDRESS}/v1/chat/completions" - method: POST - headers: - Content-Type: application/json - body: '{"model": "gpt-4", "messages": [{"role": "user", "content": "Hi"}]}' - source: - type: local - expect: - statusCode: 200 - headers: - - name: x-requested-model - comparator: equals - value: gpt-4 - - name: x-actual-model - comparator: contains - value: gpt-4 EOF {{< /doc-test >}} @@ -339,21 +324,51 @@ EOF {{% /tab %}} {{< /tabs >}} + {{< doc-test paths="llm-model-headers" >}} + # accept-encoding: identity prevents the upstream from compressing the + # response body. Without this header, axios (used by YAMLTest) requests + # gzip/br encoding by default, and the compressed body cannot be parsed + # by the json(response.body) CEL expression, so x-actual-model is never set. + YAMLTest -f - <<'EOF' + - name: verify model headers are injected + http: + url: "http://${INGRESS_GW_ADDRESS}/v1/chat/completions" + method: POST + headers: + content-type: application/json + accept-encoding: identity + body: | + {"model": "gpt-4", "messages": [{"role": "user", "content": "Hi"}]} + source: + type: local + expect: + statusCode: 200 + headers: + - name: x-requested-model + comparator: contains + value: gpt-4 + - name: x-actual-model + comparator: contains + value: gpt + EOF + {{< /doc-test >}} + Example output: - ```console {hl_lines=[1,2,5,6,7,8]} + ```console {hl_lines=[5,6,7,8]} < HTTP/1.1 200 OK HTTP/1.1 200 OK < content-type: application/json content-type: application/json < x-requested-model: gpt-4 x-requested-model: gpt-4 - < x-actual-model: gpt-4 - x-actual-model: gpt-4 + < x-actual-model: gpt-3.5-turbo-0125 + x-actual-model: gpt-3.5-turbo-0125 ... ``` Actual model values might differ slightly from the requested model, even if the same model is used. Some responses might include a unique identifier as part of the model name. In these circumstances, you might use the `contains()` function to verify. + When a fallback model handles the request, `x-actual-model` differs from `x-requested-model`: ```console {hl_lines=[2,4]} < x-requested-model: gpt-4o @@ -362,33 +377,12 @@ EOF x-actual-model: gpt-4o-mini ``` -{{< doc-test paths="llm-context-vars" >}} -kubectl delete {{< reuse "agw-docs/snippets/trafficpolicy.md" >}} llm-model-headers -n {{< reuse "agw-docs/snippets/namespace.md" >}} --ignore-not-found -{{< /doc-test >}} - -{{< doc-test paths="llm-context-vars" >}} -kubectl apply -f- <}} -spec: - parentRefs: - - name: agentgateway-proxy - namespace: {{< reuse "agw-docs/snippets/namespace.md" >}} - rules: - - matches: - - path: - type: PathPrefix - value: /v1/chat/completions - backendRefs: - - name: httpbun-llm - namespace: {{< reuse "agw-docs/snippets/namespace.md" >}} - group: {{< reuse "agw-docs/snippets/group.md" >}} - kind: {{< reuse "agw-docs/snippets/backend.md" >}} -EOF -{{< /doc-test >}} + {{< callout type="info" >}} + When sending traffic to the gateway with traffic compression enabled, such as `gzip` or `br`, the CEL expression could fail. If a header is missing from a response, try a different `accept-encoding` header in your request. + {{< /callout >}} + + + ## Cleanup {{< reuse "agw-docs/snippets/cleanup.md" >}} -```shell {paths="llm-transformations,llm-model-headers,llm-context-vars"} +```shell {paths="llm-transformations,llm-model-headers"} kubectl delete {{< reuse "agw-docs/snippets/trafficpolicy.md" >}} cap-max-tokens -n {{< reuse "agw-docs/snippets/namespace.md" >}} --ignore-not-found kubectl delete {{< reuse "agw-docs/snippets/trafficpolicy.md" >}} llm-model-headers -n {{< reuse "agw-docs/snippets/namespace.md" >}} --ignore-not-found -kubectl delete {{< reuse "agw-docs/snippets/trafficpolicy.md" >}} llm-context-vars -n {{< reuse "agw-docs/snippets/namespace.md" >}} --ignore-not-found -kubectl delete httproute openai -n {{< reuse "agw-docs/snippets/namespace.md" >}} --ignore-not-found ``` + +{{< doc-test paths="llm-transformations,llm-model-headers" >}} +kubectl delete httproute openai -n {{< reuse "agw-docs/snippets/namespace.md" >}} --ignore-not-found +{{< /doc-test >}} diff --git a/content/docs/kubernetes/latest/llm/transformations.md b/content/docs/kubernetes/latest/llm/transformations.md index 8844d39d..16a56754 100644 --- a/content/docs/kubernetes/latest/llm/transformations.md +++ b/content/docs/kubernetes/latest/llm/transformations.md @@ -8,8 +8,8 @@ test: path: standard - file: content/docs/kubernetes/latest/setup/gateway.md path: all - - file: content/docs/kubernetes/latest/llm/providers/httpbun.md - path: setup-httpbun-llm + - file: content/docs/kubernetes/latest/llm/providers/openai.md + path: openai-setup - file: content/docs/kubernetes/latest/llm/transformations.md path: llm-transformations llm-model-headers: @@ -17,19 +17,10 @@ test: path: standard - file: content/docs/kubernetes/latest/setup/gateway.md path: all - - file: content/docs/kubernetes/latest/llm/providers/httpbun.md - path: setup-httpbun-llm + - file: content/docs/kubernetes/latest/llm/providers/openai.md + path: openai-setup - file: content/docs/kubernetes/latest/llm/transformations.md path: llm-model-headers - llm-context-vars: - - file: content/docs/kubernetes/latest/quickstart/install.md - path: standard - - file: content/docs/kubernetes/latest/setup/gateway.md - path: all - - file: content/docs/kubernetes/latest/llm/providers/httpbun.md - path: setup-httpbun-llm - - file: content/docs/kubernetes/latest/llm/transformations.md - path: llm-context-vars --- {{< reuse "agw-docs/pages/agentgateway/llm/transformations.md" >}} diff --git a/content/docs/kubernetes/latest/traffic-management/transformations/llm-model-headers.md b/content/docs/kubernetes/latest/traffic-management/transformations/llm-model-headers.md index d3613df4..2594655d 100644 --- a/content/docs/kubernetes/latest/traffic-management/transformations/llm-model-headers.md +++ b/content/docs/kubernetes/latest/traffic-management/transformations/llm-model-headers.md @@ -8,8 +8,8 @@ test: path: standard - file: content/docs/kubernetes/latest/setup/gateway.md path: all - - file: content/docs/kubernetes/latest/llm/providers/httpbun.md - path: setup-httpbun-llm + - file: content/docs/kubernetes/latest/llm/providers/openai.md + path: openai-setup - file: content/docs/kubernetes/latest/traffic-management/transformations/llm-model-headers.md path: llm-transformations @@ -18,19 +18,11 @@ test: path: standard - file: content/docs/kubernetes/latest/setup/gateway.md path: all - - file: content/docs/kubernetes/latest/llm/providers/httpbun.md - path: setup-httpbun-llm + - file: content/docs/kubernetes/latest/llm/providers/openai.md + path: openai-setup - file: content/docs/kubernetes/latest/traffic-management/transformations/llm-model-headers.md path: llm-model-headers - llm-context-vars: - - file: content/docs/kubernetes/latest/quickstart/install.md - path: standard - - file: content/docs/kubernetes/latest/setup/gateway.md - path: all - - file: content/docs/kubernetes/latest/llm/providers/httpbun.md - path: setup-httpbun-llm - - file: content/docs/kubernetes/latest/traffic-management/transformations/llm-model-headers.md - path: llm-context-vars + --- {{< reuse "agw-docs/pages/agentgateway/llm/transformations.md" >}} diff --git a/content/docs/kubernetes/main/llm/transformations.md b/content/docs/kubernetes/main/llm/transformations.md index c3e2991c..e677f0a3 100644 --- a/content/docs/kubernetes/main/llm/transformations.md +++ b/content/docs/kubernetes/main/llm/transformations.md @@ -8,8 +8,8 @@ test: path: standard - file: content/docs/kubernetes/main/setup/gateway.md path: all - - file: content/docs/kubernetes/main/llm/providers/httpbun.md - path: setup-httpbun-llm + - file: content/docs/kubernetes/main/llm/providers/openai.md + path: openai-setup - file: content/docs/kubernetes/main/llm/transformations.md path: llm-transformations llm-model-headers: @@ -17,19 +17,10 @@ test: path: standard - file: content/docs/kubernetes/main/setup/gateway.md path: all - - file: content/docs/kubernetes/main/llm/providers/httpbun.md - path: setup-httpbun-llm + - file: content/docs/kubernetes/main/llm/providers/openai.md + path: openai-setup - file: content/docs/kubernetes/main/llm/transformations.md path: llm-model-headers - llm-context-vars: - - file: content/docs/kubernetes/main/quickstart/install.md - path: standard - - file: content/docs/kubernetes/main/setup/gateway.md - path: all - - file: content/docs/kubernetes/main/llm/providers/httpbun.md - path: setup-httpbun-llm - - file: content/docs/kubernetes/main/llm/transformations.md - path: llm-context-vars --- {{< reuse "agw-docs/pages/agentgateway/llm/transformations.md" >}} diff --git a/content/docs/kubernetes/main/traffic-management/transformations/llm-model-headers.md b/content/docs/kubernetes/main/traffic-management/transformations/llm-model-headers.md index 5c04fb32..f2bd1b04 100644 --- a/content/docs/kubernetes/main/traffic-management/transformations/llm-model-headers.md +++ b/content/docs/kubernetes/main/traffic-management/transformations/llm-model-headers.md @@ -8,8 +8,8 @@ test: path: standard - file: content/docs/kubernetes/main/setup/gateway.md path: all - - file: content/docs/kubernetes/main/llm/providers/httpbun.md - path: setup-httpbun-llm + - file: content/docs/kubernetes/main/llm/providers/openai.md + path: openai-setup - file: content/docs/kubernetes/main/traffic-management/transformations/llm-model-headers.md path: llm-transformations @@ -18,19 +18,11 @@ test: path: standard - file: content/docs/kubernetes/main/setup/gateway.md path: all - - file: content/docs/kubernetes/main/llm/providers/httpbun.md - path: setup-httpbun-llm + - file: content/docs/kubernetes/main/llm/providers/openai.md + path: openai-setup - file: content/docs/kubernetes/main/traffic-management/transformations/llm-model-headers.md path: llm-model-headers - llm-context-vars: - - file: content/docs/kubernetes/main/quickstart/install.md - path: standard - - file: content/docs/kubernetes/main/setup/gateway.md - path: all - - file: content/docs/kubernetes/main/llm/providers/httpbun.md - path: setup-httpbun-llm - - file: content/docs/kubernetes/main/traffic-management/transformations/llm-model-headers.md - path: llm-context-vars + --- {{< reuse "agw-docs/pages/agentgateway/llm/transformations.md" >}}