diff --git a/inference/a3mega/deepseek-r1-671b/vllm-serving-gke/values.yaml b/inference/a3mega/deepseek-r1-671b/vllm-serving-gke/values.yaml
index dd8f5a56..992b436d 100644
--- a/inference/a3mega/deepseek-r1-671b/vllm-serving-gke/values.yaml
+++ b/inference/a3mega/deepseek-r1-671b/vllm-serving-gke/values.yaml
@@ -48,7 +48,7 @@ gpuPlatformSettings:
 network:
   ncclSettings:
   - name: NCCL_DEBUG
-    value: "VERSION"
+    value: "TRACE"
   subnetworks[]:
 
 vllm:
diff --git a/inference/a3mega/llama-4/vllm-serving-gke/response.json b/inference/a3mega/llama-4/vllm-serving-gke/response.json
new file mode 100644
index 00000000..eacfe635
--- /dev/null
+++ b/inference/a3mega/llama-4/vllm-serving-gke/response.json
@@ -0,0 +1 @@
+{"object":"error","message":"The model `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8` does not exist.","type":"NotFoundError","param":null,"code":404}
\ No newline at end of file
diff --git a/inference/a3mega/llama-4/vllm-serving-gke/values.yaml b/inference/a3mega/llama-4/vllm-serving-gke/values.yaml
index 579c0992..3ed100ea 100644
--- a/inference/a3mega/llama-4/vllm-serving-gke/values.yaml
+++ b/inference/a3mega/llama-4/vllm-serving-gke/values.yaml
@@ -41,14 +41,14 @@ volumes:
 
 gpuPlatformSettings:
   useHostPlugin: false
-  ncclPluginImage: "us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/nccl-plugin-gpudirecttcpx-dev:v1.0.8-1"
-  rxdmImage: "us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/tcpgpudmarxd-dev:v1.0.14"
-  ncclBuildType: 223
+  ncclPluginImage: "us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/nccl-plugin-gpudirecttcpx-dev:v1.0.15"
+  rxdmImage: "us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpxo/tcpgpudmarxd-dev:v1.0.21"
+  ncclBuildType: 228
 
 network:
   ncclSettings:
   - name: NCCL_DEBUG
-    value: "VERSION"
+    value: "TRACE"
   subnetworks[]:
 
 vllm:
@@ -58,4 +58,4 @@ vllm:
     ports:
       http: 8000
   serverArgs:
-    max-model-len: 32768
\ No newline at end of file
+    max-model-len: 32768
diff --git a/src/helm-charts/a3mega/vllm-inference/multi-host/templates/lws-deployment.yaml b/src/helm-charts/a3mega/vllm-inference/multi-host/templates/lws-deployment.yaml
index 12c7f628..50da2fb3 100644
--- a/src/helm-charts/a3mega/vllm-inference/multi-host/templates/lws-deployment.yaml
+++ b/src/helm-charts/a3mega/vllm-inference/multi-host/templates/lws-deployment.yaml
@@ -115,7 +115,7 @@ spec:
           imagePullPolicy: Always
           volumeMounts:
             - name: nccl-plugin-volume
-              mountPath: /usr/local/nccl-plugin
+              mountPath: /usr/local/tcpxo
           env:
           - name: BUILD_TYPE
             value: "{{ $root.Values.gpuPlatformSettings.ncclBuildType }}"
@@ -126,7 +126,7 @@ spec:
             set -ex
             chmod 755 /scripts/container_entry.sh
             /scripts/container_entry.sh install --install-nccl --nccl-buildtype ${BUILD_TYPE}
-            cp -r /var/lib/tcpxo/* /usr/local/nccl-plugin/
+            cp -r /var/lib/tcpxo/* /usr/local/tcpxo/
 
         {{- end }}
 
@@ -206,13 +206,13 @@ spec:
                 value: /usr/local/nvidia/lib64
               {{- else }}
               - name: LD_LIBRARY_PATH
-                value: /usr/local/nccl-plugin/lib64:/usr/local/nvidia/lib64
+                value: /usr/local/tcpxo/lib64:/usr/local/nvidia/lib64
               - name: NCCL_LIB_DIR
-                value: /usr/local/nccl-plugin/lib64
+                value: /usr/local/tcpxo/lib64
               {{- end }}
               - name: NCCL_FASTRAK_LLCM_DEVICE_DIRECTORY
                 value: /dev/aperture_devices
-
+              
               # NCCL settings from A3Mega configuration
               - name: NCCL_FASTRAK_CTRL_DEV
                 value: "eth0"
@@ -223,7 +223,7 @@ spec:
               - name: NCCL_ALGO
                 value: "Ring,Tree"
               - name: NCCL_PROTO
-                value: "Simple"
+                value: "Simple,LL128"
               - name: NCCL_MIN_NCHANNELS
                 value: "4"
               - name: NCCL_DYNAMIC_CHUNK_SIZE
@@ -253,17 +253,19 @@ spec:
               - name: NCCL_TUNER_PLUGIN
                 value: "libnccl-tuner.so"
               - name: NCCL_TUNER_CONFIG_PATH
-                value: "/usr/local/nccl-plugin/lib64/a3plus_tuner_config.textproto"
+                value: "/usr/local/tcpxo/lib64/a3plus_tuner_config.textproto"
               - name: NCCL_SHIMNET_GUEST_CONFIG_CHECKER_CONFIG_FILE
-                value: "/usr/local/nccl-plugin/lib64/a3plus_guest_config.textproto"
+                value: "/usr/local/tcpxo/lib64/a3plus_guest_config.textproto"
               - name: NCCL_NVLS_ENABLE
-                value: "0"
+                value: "1"
               - name: NCCL_FASTRAK_PLUGIN_ACCEPT_TIMEOUT_MS
                 value: "600000"
               - name: CUDA_VISIBLE_DEVICES
                 value: "0,1,2,3,4,5,6,7"
               - name: NCCL_FASTRAK_IFNAME
                 value: "eth1,eth2,eth3,eth4,eth5,eth6,eth7,eth8"
+              - name: NCCL_NVLSTREE_MAX_CHUNKSIZE
+                value: "131072"
 
               # The following is needed to prevent send-receive stalling execution
               - name: NVTE_FWD_LAYERNORM_SM_MARGIN
@@ -275,6 +277,14 @@ spec:
               - name: NCCL_P2P_PXN_LEVEL
                 value: "0"
 
+              # GPUViz
+              - name: NCCL_NET_PLUGIN_TELEMETRY_MODE
+                value: "1"  
+              - name: NCCL_GPUVIZ_ENABLE_MILLISECOND_BANDWIDTH_OUTPUT
+                value: "1"
+              - name: NCCL_GPUVIZ_FILE_ROTATION_INTERVAL_IN_SECONDS
+                value: "300"
+
               {{- range $environment_variable := $root.Values.network.ncclSettings }}
               - name: {{ $environment_variable.name }}
                 value: "{{ $environment_variable.value }}"
@@ -321,7 +331,7 @@ spec:
               mountPath: /dev/aperture_devices
             {{- if not $root.Values.gpuPlatformSettings.useHostPlugin }}
             - name: nccl-plugin-volume
-              mountPath: /usr/local/nccl-plugin
+              mountPath: /usr/local/tcpxo
             {{- end }}
             - name: sys
               mountPath: /hostsysfs
@@ -430,7 +440,7 @@ spec:
           imagePullPolicy: Always
           volumeMounts:
             - name: nccl-plugin-volume
-              mountPath: /usr/local/nccl-plugin
+              mountPath: /usr/local/tcpxo
           env:
           - name: BUILD_TYPE
             value: "{{ $root.Values.gpuPlatformSettings.ncclBuildType }}"
@@ -441,7 +451,7 @@ spec:
             set -ex
             chmod 755 /scripts/container_entry.sh
             /scripts/container_entry.sh install --install-nccl --nccl-buildtype ${BUILD_TYPE}
-            cp -r /var/lib/tcpxo/* /usr/local/nccl-plugin/
+            cp -r /var/lib/tcpxo/* /usr/local/tcpxo/
 
         {{- end }}
 
@@ -522,9 +532,9 @@ spec:
                 value: /usr/local/nvidia/lib64
               {{- else }}
               - name: LD_LIBRARY_PATH
-                value: /usr/local/nccl-plugin/lib64:/usr/local/nvidia/lib64
+                value: /usr/local/tcpxo/lib64:/usr/local/nvidia/lib64
               - name: NCCL_LIB_DIR
-                value: /usr/local/nccl-plugin/lib64
+                value: /usr/local/tcpxo/lib64
               {{- end }}
               - name: NCCL_FASTRAK_LLCM_DEVICE_DIRECTORY
                 value: /dev/aperture_devices
@@ -569,9 +579,9 @@ spec:
               - name: NCCL_TUNER_PLUGIN
                 value: "libnccl-tuner.so"
               - name: NCCL_TUNER_CONFIG_PATH
-                value: "/usr/local/nccl-plugin/lib64/a3plus_tuner_config.textproto"
+                value: "/usr/local/tcpxo/lib64/a3plus_tuner_config.textproto"
               - name: NCCL_SHIMNET_GUEST_CONFIG_CHECKER_CONFIG_FILE
-                value: "/usr/local/nccl-plugin/lib64/a3plus_guest_config.textproto"
+                value: "/usr/local/tcpxo/lib64/a3plus_guest_config.textproto"
               - name: NCCL_NVLS_ENABLE
                 value: "0"
               - name: NCCL_FASTRAK_PLUGIN_ACCEPT_TIMEOUT_MS
@@ -611,7 +621,7 @@ spec:
               mountPath: /dev/aperture_devices
             {{- if not $root.Values.gpuPlatformSettings.useHostPlugin }}
             - name: nccl-plugin-volume
-              mountPath: /usr/local/nccl-plugin
+              mountPath: /usr/local/tcpxo
             {{- end }}
             - name: sys
               mountPath: /hostsysfs
diff --git a/src/helm-charts/a3mega/vllm-inference/single-host/templates/model-serve-launcher.yaml b/src/helm-charts/a3mega/vllm-inference/single-host/templates/model-serve-launcher.yaml
index 18ccbc33..ff411250 100644
--- a/src/helm-charts/a3mega/vllm-inference/single-host/templates/model-serve-launcher.yaml
+++ b/src/helm-charts/a3mega/vllm-inference/single-host/templates/model-serve-launcher.yaml
@@ -119,7 +119,7 @@ spec:
         imagePullPolicy: Always
         volumeMounts:
           - name: nccl-plugin-volume
-            mountPath: /usr/local/nccl-plugin
+            mountPath: /usr/local/tcpxo
         env:
         - name: BUILD_TYPE
           value: "{{ $root.Values.gpuPlatformSettings.ncclBuildType }}"
@@ -130,7 +130,7 @@ spec:
           set -ex
           chmod 755 /scripts/container_entry.sh
           /scripts/container_entry.sh install --install-nccl --nccl-buildtype ${BUILD_TYPE}
-          cp -r /var/lib/tcpxo/* /usr/local/nccl-plugin/
+          cp -r /var/lib/tcpxo/* /usr/local/tcpxo/
 
       {{- end }}
 
@@ -203,9 +203,9 @@ spec:
               value: /usr/local/nvidia/lib64
             {{- else }}
             - name: LD_LIBRARY_PATH
-              value: /usr/local/nccl-plugin/lib64:/usr/local/nvidia/lib64
+              value: /usr/local/tcpxo/lib64:/usr/local/nvidia/lib64
             - name: NCCL_LIB_DIR
-              value: /usr/local/nccl-plugin/lib64
+              value: /usr/local/tcpxo/lib64
             {{- end }}
             - name: NCCL_FASTRAK_LLCM_DEVICE_DIRECTORY
               value: /dev/aperture_devices
@@ -220,7 +220,7 @@ spec:
             - name: NCCL_ALGO
               value: "Ring,Tree"
             - name: NCCL_PROTO
-              value: "Simple"
+              value: "Simple,LL128"
             - name: NCCL_MIN_NCHANNELS
               value: "4"
             - name: NCCL_DYNAMIC_CHUNK_SIZE
@@ -250,17 +250,19 @@ spec:
             - name: NCCL_TUNER_PLUGIN
               value: "libnccl-tuner.so"
             - name: NCCL_TUNER_CONFIG_PATH
-              value: "/usr/local/nccl-plugin/lib64/a3plus_tuner_config.textproto"
+              value: "/usr/local/tcpxo/lib64/a3plus_tuner_config.textproto"
             - name: NCCL_SHIMNET_GUEST_CONFIG_CHECKER_CONFIG_FILE
-              value: "/usr/local/nccl-plugin/lib64/a3plus_guest_config.textproto"
+              value: "/usr/local/tcpxo/lib64/a3plus_guest_config.textproto"
             - name: NCCL_NVLS_ENABLE
-              value: "0"
+              value: "1"
             - name: NCCL_FASTRAK_PLUGIN_ACCEPT_TIMEOUT_MS
               value: "600000"
             - name: CUDA_VISIBLE_DEVICES
               value: "0,1,2,3,4,5,6,7"
             - name: NCCL_FASTRAK_IFNAME
               value: "eth1,eth2,eth3,eth4,eth5,eth6,eth7,eth8"
+            - name: NCCL_NVLSTREE_MAX_CHUNKSIZE
+              value: "131072"
 
             # The following is needed to prevent send-receive stalling execution
             - name: NVTE_FWD_LAYERNORM_SM_MARGIN
@@ -272,6 +274,14 @@ spec:
             - name: NCCL_P2P_PXN_LEVEL
               value: "0"
 
+            # GPUViz
+            - name: NCCL_NET_PLUGIN_TELEMETRY_MODE
+              value: "1"  
+            - name: NCCL_GPUVIZ_ENABLE_MILLISECOND_BANDWIDTH_OUTPUT
+              value: "1"
+            - name: NCCL_GPUVIZ_FILE_ROTATION_INTERVAL_IN_SECONDS
+              value: "300"
+
             {{- range $environment_variable := $root.Values.network.ncclSettings }}
             - name: {{ $environment_variable.name }}
               value: "{{ $environment_variable.value }}"
@@ -311,7 +321,7 @@ spec:
             mountPath: /dev/aperture_devices
           {{- if not $root.Values.gpuPlatformSettings.useHostPlugin }}
           - name: nccl-plugin-volume
-            mountPath: /usr/local/nccl-plugin
+            mountPath: /usr/local/tcpxo
           {{- end }}
           - name: sys
             mountPath: /hostsysfs