From b45d36874a9e9a3bf990eb9d5c4f37b70f063cd7 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 09:27:55 -0400
Subject: [PATCH 01/40] Specify task

---
 .task/AGENTS.md    |   3 +
 .task/saacogs.json | 703 +++++++++++++++++++++++++++++++++++++++++++
 .task/task.md      | 723 +++++++++++++++++++++++++++++++++++++++++++++
 AGENTS.md          |   1 +
 CLAUDE.md          |   1 +
 5 files changed, 1431 insertions(+)
 create mode 100644 .task/AGENTS.md
 create mode 100644 .task/saacogs.json
 create mode 100644 .task/task.md
 create mode 120000 AGENTS.md
 create mode 120000 CLAUDE.md

diff --git a/.task/AGENTS.md b/.task/AGENTS.md
new file mode 100644
index 00000000..ff9f7032
--- /dev/null
+++ b/.task/AGENTS.md
@@ -0,0 +1,3 @@
+At the start of the conversation output the following so that I know you've read these instructions:
+
+📖 omes:saa-cogs
diff --git a/.task/saacogs.json b/.task/saacogs.json
new file mode 100644
index 00000000..8654452f
--- /dev/null
+++ b/.task/saacogs.json
@@ -0,0 +1,703 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": { "type": "grafana", "uid": "-- Grafana --" },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "links": [],
+  "panels": [
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
+      "id": 1,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"frontend-.*\"})",
+          "legendFormat": "frontend",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"})",
+          "legendFormat": "history",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"matching-.*\"})",
+          "legendFormat": "matching",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "CPU per service (vCPU)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          },
+          "unit": "decbytes"
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
+      "id": 2,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"frontend\",workload_type=\"deployment\"}))",
+          "legendFormat": "frontend",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"history\",workload_type=\"deployment\"}))",
+          "legendFormat": "history",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"matching\",workload_type=\"deployment\"}))",
+          "legendFormat": "matching",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Memory per service (p50 working set)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 8 },
+      "id": 3,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"frontend\"}[$__rate_interval]))",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Frontend RPC by method",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 8 },
+      "id": 4,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval]))",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "History RPC by method",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 8 },
+      "id": 5,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"matching\"}[$__rate_interval]))",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Matching RPC by method",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
+      "id": 6,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (table)(rate(cassandra_query{cluster=\"$cluster\",verb!=\"select\"}[$__rate_interval]))",
+          "legendFormat": "query: {{table}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (table)(rate(cassandra_batch{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "batch: {{table}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Astra writes by table",
+      "description": "Validate r_Cass = 3/7 for writes. cassandra_query filtered to verb!=select; cassandra_batch is always writes.",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
+      "id": 7,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (table)(rate(cassandra_query{cluster=\"$cluster\",verb=\"select\"}[$__rate_interval]))",
+          "legendFormat": "{{table}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Astra reads by table",
+      "description": "Reads are not expected to differ much between SAW and SAA (similar caching, ~1 read on creation).",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 },
+      "id": 8,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (walType)(rate(wal_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "{{walType}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "WAL operation rate by type",
+      "description": "Covers both reads and writes (no separate write-only metric). Expect HISTORY_EVENT_WAL activity for SAW only; both use MUTABLE_STATE_WAL.",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
+      "id": 9,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (operation)(rate(visibility_persistence_requests{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Visibility persistence rate by operation",
+      "description": "OSS visibility_persistence_requests counter, tagged by operation (RecordWorkflowExecutionStarted, RecordWorkflowExecutionClosed, UpsertWorkflowExecution, DeleteWorkflowExecution).",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 32 },
+      "id": 10,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(rate(syncmatch_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "sync match",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(rate(asyncmatch_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "async match",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Sync vs async match rate",
+      "description": "Health check on experimental conditions. Async match means tasks went through persistence/backlog rather than being dispatched directly to a waiting poller.",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "schemaVersion": 40,
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": { "text": "prod", "value": "prod" },
+        "name": "env",
+        "options": [
+          { "selected": true, "text": "prod", "value": "prod" },
+          { "selected": false, "text": "dev", "value": "test" }
+        ],
+        "query": "prod : prod, dev : test",
+        "type": "custom"
+      },
+      {
+        "current": { "text": "prod thanos", "value": "af7fe237-211e-413e-9723-41a73886bcbb" },
+        "hide": 2,
+        "includeAll": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "${env:text}.*",
+        "type": "datasource"
+      },
+      {
+        "current": {},
+        "datasource": { "type": "prometheus", "uid": "${datasource}" },
+        "definition": "label_values(restarts,cluster)",
+        "includeAll": false,
+        "label": "Cluster",
+        "name": "cluster",
+        "options": [],
+        "query": { "query": "label_values(restarts,cluster)", "refId": "StandardVariableQuery" },
+        "refresh": 2,
+        "regex": "",
+        "type": "query"
+      }
+    ]
+  },
+  "time": { "from": "now-3h", "to": "now" },
+  "timepicker": {},
+  "timezone": "utc",
+  "title": "SAA COGS",
+  "uid": "saacogs",
+  "version": 1,
+  "weekStart": ""
+}
diff --git a/.task/task.md b/.task/task.md
new file mode 100644
index 00000000..3e9d1e26
--- /dev/null
+++ b/.task/task.md
@@ -0,0 +1,723 @@
+For background context, please study the following documents carefully:
+
+START_DOCUMENT------------------------------------------------------------------------------
+# Temporal Activity Execution & saas-temporal Cloud Persistence: Implementation Overview
+
+## Part 1: Activity Execution Models in Temporal Server
+
+### 1.1 CHASM Standalone Activities (`chasm/lib/activity/`)
+
+CHASM standalone activities are first-class, independently-scheduled executions outside workflow context. They use **mutable state only** -- no history events.
+
+#### State Machine
+
+States defined in `chasm/lib/activity/proto/v1/activity_state.proto`:
+
+```
+UNSPECIFIED
+  → SCHEDULED
+    → STARTED
+      → COMPLETED (terminal)
+      → FAILED (terminal)
+      → CANCEL_REQUESTED → CANCELED (terminal)
+      → TIMED_OUT (terminal)
+      → TERMINATED (terminal)
+    → CANCEL_REQUESTED → CANCELED (terminal)
+    → TIMED_OUT (terminal)
+    → TERMINATED (terminal)
+    → SCHEDULED (retry path)
+```
+
+Lifecycle states (`activity.go:95-107`):
+- `LifecycleStateRunning`: SCHEDULED, STARTED, CANCEL_REQUESTED
+- `LifecycleStateCompleted`: COMPLETED
+- `LifecycleStateFailed`: FAILED, TERMINATED, TIMED_OUT, CANCELED
+
+#### State Transitions (`statemachine.go`)
+
+| Transition | From | To | Trigger |
+|---|---|---|---|
+| TransitionScheduled (37-77) | UNSPECIFIED | SCHEDULED | Initial scheduling |
+| TransitionRescheduled (87-127) | STARTED | SCHEDULED | Retry after failure |
+| TransitionStarted (130-169) | SCHEDULED | STARTED | Worker accepts task |
+| TransitionCompleted (177-202) | STARTED/CANCEL_REQUESTED | COMPLETED | Worker completes |
+| TransitionFailed (210-237) | STARTED/CANCEL_REQUESTED | FAILED | Non-retryable failure |
+| TransitionCancelRequested (278-295) | STARTED/SCHEDULED | CANCEL_REQUESTED | Cancel API called |
+| TransitionCanceled (304-331) | CANCEL_REQUESTED | CANCELED | Worker acknowledges cancel |
+| TransitionTerminated (246-275) | SCHEDULED/STARTED/CANCEL_REQUESTED | TERMINATED | Terminate API called |
+| TransitionTimedOut (340-374) | SCHEDULED/STARTED/CANCEL_REQUESTED | TIMED_OUT | Timer task fires |
+
+#### Mutable State Structures
+
+**ActivityState** (proto):
+- `activity_type`, `task_queue`, timeouts (`schedule_to_close`, `schedule_to_start`, `start_to_close`, `heartbeat`), `retry_policy`, `status`, `schedule_time`, `priority`, `cancel_state`, `terminate_state`
+
+**Activity Go Component** (`activity.go:52-68`):
+- `ActivityState` (embedded proto)
+- `Visibility: chasm.Field[*chasm.Visibility]` -- search attributes
+- `LastAttempt: chasm.Field[*ActivityAttemptState]` -- attempt count, stamp, started_time, failure details, worker identity
+- `LastHeartbeat: chasm.Field[*ActivityHeartbeatState]` -- heartbeat details and recorded_time
+- `RequestData: chasm.Field[*ActivityRequestData]` -- input, header, user_metadata
+- `Outcome: chasm.Field[*ActivityOutcome]` -- successful (output) or failed (failure)
+- `Store: chasm.ParentPtr[ActivityStore]` -- parent workflow (nil for standalone)
+
+#### Task Flow
+
+1. **Scheduling** (`handler.go:51-104`): `StartActivityExecution()` → creates Activity → applies TransitionScheduled
+2. **Dispatch** (`activity_tasks.go:21-79`): `activityDispatchTaskExecutor` pushes to matching service via `AddActivityTask()`
+3. **Start** (`activity.go:173-191`): `HandleStarted()` applies TransitionStarted, schedules start-to-close and heartbeat timeout tasks
+4. **Completion** (`activity.go:259-280`): `HandleCompleted()` applies TransitionCompleted
+5. **Failure** (`activity.go:284-323`): `HandleFailed()` checks retryability → either `tryReschedule()` or TransitionFailed
+6. **Heartbeat** (`activity.go:559-586`): Updates LastHeartbeat, reschedules heartbeat timeout task
+
+#### Timeout Tasks
+
+- **ScheduleToStartTimeoutTask** (`activity_tasks.go:81-116`): Non-retryable → TIMED_OUT
+- **ScheduleToCloseTimeoutTask** (`activity_tasks.go:118-150`): Non-retryable → TIMED_OUT
+- **StartToCloseTimeoutTask** (`activity_tasks.go:152-198`): Attempts retry via `tryReschedule()`; if not retryable → TIMED_OUT
+- **HeartbeatTimeoutTask** (`activity_tasks.go:200-276`): Validates heartbeat recency; attempts retry; if not retryable → TIMED_OUT
+
+#### Retry Logic
+
+- `shouldRetry()` (`activity.go:504-514`): Checks TransitionRescheduled possible, attempt < max, enough time remaining
+- `hasEnoughTimeForRetry()` (`activity.go:518-534`): Exponential backoff calculation against schedule-to-close deadline
+- `tryReschedule()` (`activity.go:489-502`): Applies TransitionRescheduled (increments attempt, schedules dispatch with backoff)
+
+#### Cancellation
+
+- `RequestCancelActivityExecution` (`handler.go:273-296`): Applies TransitionCancelRequested
+  - If SCHEDULED: immediately applies TransitionCanceled (`activity.go:414-433`)
+  - If STARTED: stays CANCEL_REQUESTED; worker receives cancellation on next interaction
+
+---
+
+### 1.2 Legacy Workflow Activities
+
+Activities executed as part of a workflow use **mutable state (ActivityInfo) plus history events**.
+
+#### History Events
+
+```
+EVENT_TYPE_ACTIVITY_TASK_SCHEDULED (10)
+EVENT_TYPE_ACTIVITY_TASK_STARTED (11)
+EVENT_TYPE_ACTIVITY_TASK_COMPLETED (12)
+EVENT_TYPE_ACTIVITY_TASK_FAILED (13)
+EVENT_TYPE_ACTIVITY_TASK_TIMED_OUT (14)
+EVENT_TYPE_ACTIVITY_TASK_CANCEL_REQUESTED (15)
+EVENT_TYPE_ACTIVITY_TASK_CANCELED (16)
+```
+
+#### ActivityInfo Mutable State (`persistence/v1/executions.proto:524-661`)
+
+Core: `activity_id`, `activity_type`, `task_queue`, `scheduled_time`, `started_time`, `started_event_id`, `scheduled_event_id`
+
+Timeouts: `schedule_to_close_timeout`, `schedule_to_start_timeout`, `start_to_close_timeout`, `heartbeat_timeout`
+
+Retry: `attempt`, `has_retry_policy`, `retry_initial_interval`, `retry_maximum_interval`, `retry_maximum_attempts`, `retry_backoff_coefficient`, `retry_expiration_time`, `retry_non_retryable_error_types`, `retry_last_failure`
+
+State flags: `cancel_requested`, `cancel_request_id`, `timer_task_status` (bit flags), `stamp`, `paused`, `pause_info`
+
+#### Pending Activity States (`activity.go:53-61`)
+
+- SCHEDULED: `StartedEventId == 0`
+- STARTED: `StartedEventId != 0 && !CancelRequested`
+- CANCEL_REQUESTED: `CancelRequested`
+- PAUSED: `Paused && Scheduled`
+- PAUSE_REQUESTED: `Paused && Started`
+
+#### Timer Task Status Flags
+
+```go
+TimerTaskStatusCreatedScheduleToStart = 1
+TimerTaskStatusCreatedScheduleToClose = 2
+TimerTaskStatusCreatedStartToClose    = 4
+TimerTaskStatusCreatedHeartbeat       = 8
+```
+
+#### Pause/Unpause/Reset (unique to legacy model)
+
+- **Pause** (`activity.go:254-284`): Sets `paused = true`, increments stamp if SCHEDULED
+- **Unpause** (`activity.go:388-425`): Clears pause, regenerates retry task if SCHEDULED
+- **Reset** (`activity.go:286-379`): Resets attempt to 1, optionally resets heartbeat/options
+
+#### API Handlers (`service/history/api/`)
+
+- `recordactivitytaskstarted/api.go`: Creates ActivityTaskStartedEvent
+- `respondactivitytaskcompleted/api.go`: Creates ActivityTaskCompletedEvent
+- `respondactivitytaskfailed/api.go`: Retry or ActivityTaskFailedEvent
+- `respondactivitytaskcanceled/api.go`: Creates ActivityTaskCanceledEvent
+- `recordactivitytaskheartbeat/api.go`: Updates heartbeat state, reschedules timeout
+
+---
+
+### 1.3 Activity Metrics (Both Models)
+
+Defined in `common/metrics/metric_defs.go`. Both models emit the same metric names.
+
+**Counters:**
+| Metric | Description |
+|---|---|
+| `activity_success` | Successful completions (excludes retries) |
+| `activity_fail` | Final failures (retries exhausted) |
+| `activity_task_fail` | Per-attempt failures (includes retries) |
+| `activity_cancel` | Canceled activities |
+| `activity_terminate` | Terminated activities (CHASM only) |
+| `activity_timeout` | Terminal timeouts |
+| `activity_task_timeout` | Per-timeout events (includes retries) |
+
+**Timers:**
+| Metric | Description |
+|---|---|
+| `activity_start_to_close_latency` | StartedTime → completion/failure/timeout |
+| `activity_schedule_to_close_latency` | ScheduleTime → completion/failure/timeout/cancel |
+
+**Tags:** `namespace`, `task_queue_family`, `operation`, `activity_type`, `versioning_behavior`, `workflow_type` (set to `__temporal_standalone_activity__` for CHASM). Timeout metrics additionally tagged with `timeout_type` (SCHEDULE_TO_START, SCHEDULE_TO_CLOSE, START_TO_CLOSE, HEARTBEAT).
+
+**Metric enrichment** (`activity.go:804-824`): `enrichMetricsHandler()` adds per-task-queue-family scoping via `metrics.GetPerTaskQueueFamilyScope()`.
+
+---
+
+### 1.4 Key Differences
+
+| Aspect | CHASM Standalone | Legacy Workflow |
+|---|---|---|
+| Persistence | Mutable state only | Mutable state + history events |
+| Parent context | Standalone execution | Part of workflow execution |
+| State tracking | ActivityState + sub-components | ActivityInfo in workflow |
+| Task dispatch | Direct to matching service | Via workflow task completion |
+| Completion storage | Outcome field | History events |
+| Cancellation | Explicit CANCEL_REQUESTED state | Boolean flag in ActivityInfo |
+| Pause support | Not yet implemented | Full (pause, unpause, reset) |
+| Search attributes | Visibility component (chasm) | Workflow search attributes |
+
+---
+
+## Part 2: saas-temporal Cloud Integration
+
+### 2.1 Architecture Overview
+
+saas-temporal wraps the Temporal server to run in Temporal Cloud cells by replacing core persistence with Cloud Data Storage (CDS), backed by:
+- **Datastax Astra Cassandra** for durable storage
+- **Write-Ahead Logs (WALs)** for durability before Cassandra persistence
+- **OpenSearch/Elasticsearch** for workflow visibility
+- **Tiered Storage** (S3/GCS/Azure) for history archival
+
+### 2.2 Entry Point and Server Construction
+
+**Main:** `cmd/temporal-service/main.go`
+
+The `start` command:
+1. Loads OSS Temporal configuration from YAML
+2. Injects secrets (Astra, Elasticsearch credentials)
+3. Sets up dynamic configuration
+4. Optionally enables cloud metrics handler (Chronicle)
+5. Configures authorization (SaaS Auth0 JWT + Temporal JWT)
+6. Configures custom datastore with CDS
+7. Creates server via `cds.NewServer()`
+
+**Server creation:** `cds/export/cds/server.go`:
+```go
+func NewServer(serviceFxOpts FxOptions, opts ...temporal.ServerOption) (temporal.Server, error) {
+    return newServerFx(TopLevelModule, serviceFxOpts, opts...)
+}
+```
+
+Uses Uber FX dependency injection with modules for persistence factory, dynamic config, serialization, and per-service modules (history, matching, frontend, worker).
+
+### 2.3 CDS Factory Architecture (`cds/export/cds/factory.go`)
+
+**FactoryProvider** (lines 51-65): Implements `client.AbstractDataStoreFactory`
+- `NumberOfShards`, `OrderedDatastoreConfigs` (shards → datastores)
+- `HistoryDatastoreConfigs` (weighted distribution)
+- `WALFollowerProviders` for WAL followers
+- `Clock`, `DynamicConfig`, `ChasmRegistry`
+
+**Factory**: Manages three WAL pools:
+- **MS WAL** (MutableState): Records mutable state mutations
+- **HE WAL** (HistoryEvent): Records history events
+- **LP WAL** (LargePayload): Records oversized payloads
+
+Plus store providers: `MultiDBStoreProvider` for ordinal datastores, separate history store provider with tiered storage, optional Walker integration.
+
+### 2.4 Astra Cassandra Integration (`cds/storage/cassandra/astra/`)
+
+**Session creation** (`gocql.go`): Wraps gocql with Astra-specific config (TLS, connection pooling, retry policies) via Datastax `gocql-astra`.
+
+**Query instrumentation** (`gocql_metrics.go:48-100`): `queryMetricsObserver` instruments every query with 150-entry LRU statement cache.
+
+**Cassandra Metrics:**
+| Metric | Description |
+|---|---|
+| `CassandraConns` | Connection count |
+| `CassandraQueryTotalLatency` | Query latency |
+| `CassandraBatchTotalLatency` | Batch latency |
+| `CassandraQuery` | Query count |
+| `CassandraBytesTx` / `CassandraBytesTx` | Network bytes |
+| `CassandraLargeResponse` / `CassandraLargeRequest` | Large payload detection |
+| `CassandraRetries` | Retry histogram |
+| `CassandraErrors` | Error counters |
+
+Tags: `OperationType` (INSERT/UPDATE/DELETE/SELECT), `TableName`, `CasTag` (CAS operation)
+
+### 2.5 Write-Ahead Logs (`cds/export/wal/`, `cds/stream/`)
+
+WALs provide durability guarantees before data reaches Cassandra.
+
+**WAL Client Interface** (`cds/export/wal/crud.go`):
+```go
+WriteMS(), WriteHE(), WriteLP()  // Write operations per pool
+ReadMS(), ReadHE(), ReadLP()     // Read operations per pool
+```
+
+**Configuration** (`cds/config/configs.go:46-140`):
+- Rate limiting: `WALReadsRate`, `WALReadsBurst`
+- Timeouts: `WALDialTimeout`, `WALReadTimeout`, `WALWriteTimeout`
+- Ledger rotation: `WALLedgerRotationBytesThreshold`, `WALLedgerRotationAgeThreshold`
+- Retention: `WALLedgerLifetime`
+- Parallelism: `WALMaxParallelReads`
+- Feature flags: `WALReadV2Enabled`, `WALV2EncodingEnabled`
+
+**WAL Metrics** (`cds/metrics/metrics.go:34-56`):
+| Metric | Description |
+|---|---|
+| `wal_latency` | Operation latency |
+| `wal_stream_dial_attempt/success/error` | Connection establishment |
+| `wal_stream_dns_latency` | DNS resolution |
+| `wal_stream_connect_latency` | TCP connect |
+| `wal_stream_handshake_latency` | TLS handshake |
+| `wal_stream_send/receive_latency` | I/O latency |
+| `wal_health_check_failed_count` | Connection health |
+| `wal_write_timeout_count` | Timeout tracking |
+| `wal_reader_page_latency` | Page read latency |
+| `wal_entries_per_read` | Batch size histogram |
+| `wal_compression_count` | Compression events |
+
+**Flush Metrics** (lines 13-27):
+| Metric | Description |
+|---|---|
+| `flush_latency` | Time to flush to persistence |
+| `flush_error` | Flush failures |
+| `flush_snapshot_aborts` | Snapshot abort count |
+| `flush_persistence_behindness_bytes/count/time` | Persistence lag |
+| `flush_time_since_last_persist` | Staleness |
+| `flush_reason_count` | Flush trigger reasons (by namespace) |
+
+**Recovery Metrics** (lines 57-70):
+| Metric | Description |
+|---|---|
+| `recovery_total_latency` | Full recovery duration |
+| `recovery_open_reader_latency` | Snapshot reader open |
+| `recovery_rate_limiter_latency` | Rate limiting delay |
+| `recovery_first_read_latency/bytes` | Initial WAL read |
+| `recovery_takeover_latency` | Takeover phase |
+| `recovery_wal_update_latency` | WAL update during recovery |
+
+**Ledger Metrics** (lines 77-82):
+| Metric | Description |
+|---|---|
+| `ledger_rotation_count` | Rotations |
+| `logs_per_ledger` | Logs per ledger histogram |
+| `segments_per_shard` | Segments per shard histogram |
+| `segment_too_old_count` | GC candidates |
+| `active_segment_too_old_count` | Rotation delay |
+
+### 2.6 Execution Store Wrapper (`cds/export/cds/execution_store.go`)
+
+Wraps the Cassandra execution store to:
+- Convert mutable state mutations to WAL records (`NewMSWALRecord()`)
+- Convert history events to WAL records (`NewHEWALRecord()`)
+- Calculate storage metering
+- Manage snapshot trimming
+- Implement history event caching
+
+Implements `persistence.ExecutionStore` and `persistence.ShardStore`.
+
+### 2.7 How Activity State Flows Through CDS
+
+**CHASM activities**: Activity mutable state → MS WAL write → Cassandra persistence. No HE WAL involvement (no history events). State transitions are persisted as mutable state mutations via the execution store wrapper.
+
+**Legacy workflow activities**: ActivityInfo mutable state → MS WAL write → Cassandra. History events (Scheduled, Started, Completed, etc.) → HE WAL write → Cassandra. Both paths go through the execution store wrapper's WAL record conversion.
+
+### 2.8 OpenSearch/Elasticsearch Visibility (`visibility/`)
+
+**Factory:** `visibility/factory.go` -- `VisibilityStoreFactory` creates visibility stores configured per cloud cell.
+
+**Batch processor metrics** (`visibility/common/metrics_defs.go`):
+| Metric | Description |
+|---|---|
+| `visibility_batch_processor_request_add_latency` | Enqueue time |
+| `visibility_batch_processor_request_latency` | Total request latency |
+| `visibility_batch_processor_request_errors` | Failed requests |
+| `visibility_batch_processor_commit_latency` | Batch commit time |
+| `visibility_batch_processor_batch_size` | Items per batch histogram |
+| `visibility_batch_processor_batch_requests` | Requests per batch histogram |
+| `visibility_batch_processor_queued_requests` | Queue depth histogram |
+| `visibility_batch_processor_corrupted_data` | Data integrity failures |
+| `visibility_batch_processor_duplicate_request` | Deduplication events |
+
+### 2.9 Tiered Storage (`cds/persistence/tieredstorage/`)
+
+Long-term history archival to cloud object stores:
+- S3 (AWS): `s3_store.go`
+- GCS (Google Cloud): `gcs_store.go`
+- Azure Blob: `azure_client.go`
+
+Interface: `Upload()`, `Read()`, `Delete()`, `List()`, `PluginName()`
+
+Metrics: `ReadWorkflowHistory`, `UploadWorkflowHistory`, `DeleteWorkflowHistory`, `ListTieredStorageObjects`
+
+### 2.10 Persistence Store Metrics (`cds/persistence/metrics/defs.go`)
+
+**Store layer** (lines 70-85):
+| Metric | Description |
+|---|---|
+| `store_requests` | Request count by operation |
+| `store_latency` | Operation latency |
+| `store_errors` | Errors: shard_exists, shard_ownership_lost, condition_failed, timeout, unavailable |
+
+**Manager layer** (lines 89-102):
+| Metric | Description |
+|---|---|
+| `saas_persistence_requests` | High-level request count |
+| `saas_persistence_latency` | High-level latency |
+| `saas_persistence_errors` | Error tracking |
+
+Tags: `operation` (CreateShard, UpdateShard, GetWorkflowExecution, etc.), `component`, `cass_cluster`
+
+### 2.11 Cloud Metrics Infrastructure
+
+**Handler chain** (`cloudmetricshandler/delegating_recorders.go`):
+1. `allowlistedRecorder`: Filters through allowlist
+2. `multiRecorder`: Sends to multiple backends
+
+**Chronicle integration** (`cloudmetricshandler/chronicle_recorder.go`):
+- Enabled by `TEMPORAL_ENABLE_CLOUDMETRICSHANDLER`
+- Config: `/etc/temporal/cloudmetricshandler`
+- Kubernetes enrichment: pod name, namespace, labels
+- Backends: S3 writer, HTTP writer (to Chronicle service)
+- Batch config: 50K queue, 25K batch, 100ms flush
+
+**Action metering** (`actionmetering/metrics.go`):
+- `billable_action_count` with tags: namespace, action_type, workflow_type, workflow_task_queue
+- Activity type/task queue currently placeholder `"_unknown_"` with TODOs for standalone activity support
+
+### 2.12 Additional Cloud Features
+
+- **Authorization**: SaaS Auth0 JWT + Temporal JWT, TLS client certs
+- **Quotas/Flow Control** (`quotas/`, `flowcontrol/`): Request-level and task-queue quotas
+- **Multi-region replication** (`cds/service/history/replication/`): Custom replication filters
+- **Metering V3**: S3/GCS/Azure bucket metering
+- **SMS (etcd)**: Secondary Metadata Store for namespace/cluster metadata
+- **Dynamic config**: 150+ hot-reloadable properties (`cds/config/configs.go`)
+END_DOCUMENT--------------------------------------------------------------------------------------
+
+START_DOCUMENT------------------------------------------------------------------------------
+# Standalone Activity COGS and margins
+
+@Dan Davison March 17, 2026
+
+We want to ensure that we are billing in a way that meets our target margins for new product features in cloud, such as new CHASM execution types. To do this, we need to know certain things about COGS (cost of goods sold) for these features. This document outlines how to estimate COGS for Standalone Activity relative to Workflow and the implications of this for margins.
+
+# Motivation: avoiding cannibalization
+
+We have rules (see [temporalio/action](https://github.com/temporalio/action)) specifying how customer operations map to billable Actions. For example, suppose a customer executes a Workflow that executes a single Activity, which succeeds on first attempt without heartbeating. This incurs 2 Actions (StartWorkflow and ScheduleActivity). We’ll call this a “Single Activity Workflow” (SAW).
+
+We haven’t yet decided how we will bill for Standalone Activity (SAA). But suppose that we decide that executing a single SAA (no retries, no heartbeating) is 1 Action (StartStandaloneActivity).
+
+If we want SAA margins to match SAW margins, then we want the COGS of SAA (no retries, no heartbeating) to be ≤ 1/2 that of SAW (because we get half as much revenue for the SAA). If it is not, then there would be some degree of cannibalization (customers switch their single-activity workloads to SAA, but our margins there are worse). We’d hope it would be offset by increased volume, but we’d still prefer SAA margins to match SAW.
+
+### What about retries and heartbeating?
+
+SAW (no retries and no heartbeating) is 2 Actions. If the activity retries once it becomes 3 Actions (ScheduleActivity now happens twice); if it heartbeats once during each attempt it becomes 5 Actions.
+
+Let’s assume (as we currently intend) that we apply the same billing rules to Standalone Activity retries and heartbeating. Then, as long as SAA is not worse than Workflow Activity with respect to COGS of retries and heartbeating, our margins from those customer operations will be at least as good under SAA as when they are done in the context of a pre-CHASM workflow. CHASM has been designed for efficiency; we have reason to be optimistic that it’s not *worse* than the legacy workflow activity implementation.
+
+# Problem statement
+
+The above suggests that we should focus on estimating the ratio of COGS for Standalone Activity (SAA) relative to Single-activity Workflow (SAW) in the no retries, no heartbeating case:
+
+$$
+R = \frac{C_{SAA}}{C_{SAW}}.
+$$
+
+We expect $R < 1$ because SAA achieves execution of an activity with fewer RPCs, persistence operations, etc, than SAW. We are hoping that it is less than 1/2 since then our SAA margins are as good or better than our workflow margins, assuming we bill 1 Action for SAA.
+
+# Estimating the COGS ratio
+
+We’ll assume that the COGS for a SAA or SAW execution results solely from invoices from third parties relating to cloud compute resources. COGS for an execution type (SAA or SAW) is the sum of price ($p$) times quantity consumed ($q$) over all resources:
+
+$$
+C = \sum_{i} p_i q_i.
+$$
+
+We want the COGS ratio $R$. We can write that as a weighted average of per-resource usage ratios:
+
+$$
+R = \frac{C_{SAA}}{C_{SAW}} = \sum_i f_i r_i.
+$$
+
+This allows us to calculate $R$ as a function of two things that we can estimate:
+
+- $f_i = p_i q_{i}(SAW) / \sum_j p_j q_{j}(SAW)$ is the fraction of SAW COGS attributable to resource $i$ (“spend share”). We’ll use our current cloud spend for this.
+- $r_i = q_i(SAA) / q_i(SAW)$ is the per-resource usage ratio. We will estimate these by comparing the implementations or by running experiments in cloud cells.
+
+The resources ($i$) potentially include:
+1. Data egress
+2. CPU usage
+3. Memory usage
+4. Persistence operations against our WALs
+5. Persistence operations against Astra (to be replaced by Walker)
+6. Persistence operations against OpenSearch (visibility)
+7. Metrics/logs processing and storage costs, Clickhouse
+
+*At-rest data storage is excluded: we bill customers separately for storage on a GB/h basis, so it does not need to be subsidized by Actions. (Tangentially, it’s worth noting that we expect SAA storage to cost users half what they’d pay for SAW since SAW stores the input and output payloads in both workflow scheduled/complete events and activity scheduled/complete events.)*
+
+# Per-resource usage ratios
+
+To proceed, we need to estimate the SAW vs SAA usage ratio ($r_i$) for each resource.
+
+The following table summarizes the two implementations. It describes the simplest possible happy-path scenario: an activity that succeeds on first attempt without heartbeating, via sync matches.
+
+| # | Single-activity Workflow | Standalone Activity |
+| --- | --- | --- |
+| 1 | RPC: `StartWorkflowExecution` => HEWAL, MSWAL; Vis&; Cassandra& | RPC: `StartActivityExecution` => MSWAL; Vis&; Cassandra& |
+| 2 | Task => RPC: `AddWorkflowTask` |  |
+| 3 | RPC: `RecordWorkflowTaskStarted` => HEWAL, MSWAL; Cassandra& |  |
+| 4 | RPC: `RespondWorkflowTaskCompleted` => HEWAL, MSWAL; Cassandra& |  |
+| 5 | Task => RPC: `AddActivityTask` | Task => RPC: `AddActivityTask` |
+| 6 | RPC: `RecordActivityTaskStarted` => HEWAL, MSWAL; Cassandra& | RPC: `RecordActivityTaskStarted` => MSWAL; Cassandra& |
+| 7 | RPC: `RespondActivityTaskCompleted` => HEWAL, MSWAL; Cassandra& | RPC: `RespondActivityTaskCompleted` => MSWAL; Vis&; Cassandra& |
+| 8 | Task => RPC: `AddWorkflowTask` |  |
+| 9 | RPC: `RecordWorkflowTaskStarted` => HEWAL, MSWAL; Cassandra& |  |
+| 10 | RPC: `RespondWorkflowTaskCompleted` => HEWAL, MSWAL; Vis&; Cassandra& |  |
+- `&` indicates a write that’s not on the sync response path
+- `AddWorkflowTask` and `AddActivityTask` involve inter-service RPCs but no persistence writes in the happy path (“sync match”).
+- The table does not show worker poll requests
+- An additional `Vis&` is incurred in both cases when the execution is deleted.
+
+Comparing the implementations in the table gives
+
+$$
+r_{\text{WAL}} = \frac{3}{14} = 0.21,~~~~
+r_{\text{Cass}} = \frac{3}{7} = 0.43,~~~~
+r_{\text{Vis}} = \frac{3}{3} = 1.0.~~~~
+$$
+
+These ratios count writes only. Cassandra reads are not expected to differ much between SAW and SAA since they use similar caching mechanics with the result that a high proportiion of both SAW and SAA executions incur ~1 read (on execution creation);.
+
+In addition, we can estimate data transfer costs by comparing the implementations. These are likely dominated by egress to customer infra (ingress is free on AWS and GCP; data transfers to Astra, OpenSearch, and Grafana are in-VPC or via PrivateLink). Let the activity input and output payload sizes be $S_I$ and $S_O$. Payload egress for SAW is $2S_I + 2S_O$ (input payload sent to workflow and activity workers; output payload sent to workflow worker and client). For SAA this is $S_I + S_O$ since there is no workflow worker detour. This gives
+
+$$
+r_\text{data\_transfer} = 0.5.
+$$
+
+# COGS ratio estimate
+
+Using approximate/preliminary cloud spend share numbers (thanks @Stephen Chan ) we have:
+
+| **Resource** | **Spend share $f_i$ (preliminary)** | **Usage ratio $r_i$** | **Notes** |
+| --- | --- | --- | --- |
+| **Astra writes** | 40% | $\frac{3}{7}$ = 0.43 | SAW does 2 additional writes for each WFT |
+| **Visibility** (OpenSearch) | 20% | $\frac{3}{3}$ = 1.00 | Equal — both SAA and SAW produce exactly ~~2~~ 3 visibility updates |
+| **WAL writes** | 10% | $\frac{3}{14}$ = 0.21 | Half of Astra ratio: SAA writes only to MSWAL, whereas SAW writes to both HEWAL and MSWAL |
+| **EC2 compute** | 10% | ? | Would need cloud cell experiment |
+| **Data transfer** | 10% | $\frac{1}{2}$ = 0.50 | SAW sends payloads via workflow worker round-trip; SAA does not |
+| **Overheads** (incl. Clickhouse) | 10% | ? |  |
+
+This gives the following estimate of the COGS ratio:
+
+$$
+\begin{align*}
+R &=
+\underbrace{0.4 \times 0.43}_{\text{Astra}:~0.17} +
+\underbrace{0.2 \times 1.0}_{\text{Vis}:~0.20} +
+\underbrace{0.1 \times 0.21}_{\text{WAL}:~0.02} +
+\underbrace{0.1 \times 0.50}_{\text{Tx}:~0.05} +
+0.1 \cdot r_\text{compute} + 0.1 \cdot r_\text{overhead} \\\\
+&=
+0.44 + 0.1(r_\text{compute} + r_\text{overhead}).
+\end{align*}
+$$
+
+# Sensitivity analysis
+
+Before thinking about the implications of this for billing and margins, the next steps are:
+
+1. Refine the cloud spend estimates (Cloud Capacity team; does not involve load experiments)
+2. Decide whether we want to do load experiments to estimate $r_\text{compute}$
+3. Decide how we will address $r_\text{overhead}$
+
+For (2) and (3) we can do some initial sensitivity analysis:
+
+SAW does 10 RPCs vs SAA’s 4 (with 7 vs 3 of them doing persistence writes in the sync-match case). If services are CPU-bound then this suggests that $0.4 < r_\text{compute} < 1.0$ might be reasonable.
+
+The other overheads include (per @Stephen Chan ) Clickhouse, observability cells, and Envoy proxies. Since these costs should also scale with RPC count, let’s assume the same bounds: $0.4 < r_\text{overhead} < 1.0$. This gives:
+
+$$
+0.52 \leq R \leq 0.64.
+$$
+
+![image.png](.task/sensitivity.png)
+
+For example, if SAW margins were 70%, SAA margins would be 62% - 69%. This margin reduction would affect at maximum the ~3% of workflows that are SAW.
+
+- COGS ratio to margins conversion formula
+
+     $\text{margin}_{\text{SAA}} = 1 - 2R(1 - \text{margin}_{\text{SAW}})$.
+
+
+# Discussion
+
+- **Visibility limits SAA margins**. Visibility is expensive (20%), but SAA and SAW perform the same number of visibility writes, so it combines a large weight with the worst possible ratio.
+- **(Unfavorable) Over-provisioning would push $R$ up.** The usage ratios above for persistence are derived from write counts, which only translate to cost savings if capacity tracks usage. But e.g. Astra is bought in fixed hardware units (“Astra Classic”). If any resource component is over-provisioned then SAA and SAW would pay the same cost per execution and $r_i \to 1.0$, making SAA margins less attractive relative to workflow.
+- **Cloud spend share**. We could attempt to separate fixed costs and renormalize (see [Next steps](https://www.notion.so/Next-steps-3268fc567738805e82ddd9c1e1d4c9d1?pvs=21)). This would be favorable to SAA margins if it decreases the visibility share, but unfavorable if it decreases Astra share.
+
+    We’re estimating $f_i$ from cloud spend, so we’re assuming that the spend distribution for single-activity workflows would be similar to the spend distribution for the real mix of customer workflows. I suspect this is a reasonable modeling assumption since in both cases the application is performing the same state transitions in response to workflow and activity task processing.
+
+- **(Mixed) Effect of migration to Walker**. Walker replaces Astra with storage that is under our own control, making right-sizing easier. This may mean that the 3/7 write ratio is more fully realized under Walker, moving SAA COGS away from SAW. However, Walker will be cheaper than Astra, so persistence’s share of spend shrinks. Since persistence is where SAA has its largest advantage, this would bring SAA COGS closer to SAW.
+
+    These two effects act in opposite directions and the net result will depends on their relative magnitudes. This suggests that we should monitor COGS calculations as the Walker migration proceeds.
+
+- **(Future) A visibility backend migration would improve SAA margins.** There has been [movement](https://www.notion.so/Visibility-CDS-2a98fc567738807e9ee0f318edc4c16f?pvs=21) toward replacing OpenSearch. As discussed above, any reduction in visibility spend share would make SAA COGS more attractive relative to workflow.
+
+# Conclusion
+
+- [We are planning to bill SAA at 1/2 the price of SAW](https://www.notion.so/PRD-Standalone-Activities-for-durable-job-processing-1ee8fc567738806d8b6fe8e2eeae0fc4?pvs=21). Although there are various assumptions involved, at this point it looks like SAA COGS will be more than 1/2 SAW COGS: the estimated range above is $0.52 \leq R \leq 0.64$. This implies that some degree of cannibalization is likely. The extent of cannibalization would be bounded by the proportion of current workloads that are SAW, which is 3% per @Phil Prasek. It may be offset by volume growth attributable to SAA.
+
+# Next steps
+
+- **Refine cloud spend share estimates.**
+
+    The cloud spend share weights used in this analysis are supposed to be marginal costs. We could attempt to separate marginal vs fixed costs and renormalize our spend share percentages. This would be favorable to SAA margins if it decreases the visibility share, but unfavorable if it decreases Astra share.
+
+- **Investigate any impact of over-provisioning.**
+
+    SAA margins may be less favorable than the calculations suggest if some resources are over-provisioned. See discussion [above](https://www.notion.so/Standalone-Activity-COGS-and-margins-3268fc567738803cb63fd9397ffd351c?pvs=21).
+
+- **Decide whether to do cloud cell experiments**.
+
+    Unlike the other resource categories, we lack any obvious theoretical basis for estimating  $r_\text{compute}$ and $r_\text{overhead}$. Estimating $r_\text{compute}$ via cloud cell experiments would require perhaps one engineer-week.  If this were to show a value close to 0.4 then it would suggest that the upper bound on $R$ is 0.56, as opposed to the current 0.64. This would however still be subject to all the assumptions discussed above. We could also attempt to tighten our estimated bounds on $r_\text{overhead}$ via experiment.
+
+    If we decide to do this, the $r_\text{compute}$ experiment would be something like the following: choose a reference activity (e.g. sleeps for 10s, no heartbeating, never fails) and run SAA and SAW workloads on a cloud cell at a fixed start rate (e.g. 10/s) for a sustained period (e.g. 1hr). Fixing start rate rather than concurrency naturally controls for end-to-end latency differences between SAA and SAW.  $r_\text{cpu}$ and $r_\text{memory}$ can then be estimated from metrics as the ratio of mean utilization above the idle baseline. The analysis will need to decide how to combine them, e.g. based on which is more often limiting; alternatively, using the larger of the two would yield a conservative calculation.
+END_DOCUMENT------------------------------------------------------------------------------
+
+START_DOCUMENT------------------------------------------------------------------------------
+# Test plan for SAA COGS measurement
+
+@Dan Davison March 19, 2026
+
+The [SAA COGS proposal](.task/saa-cogs.md) made an initial estimate of the SAA/SAW COGS ratio based on estimating persistence, visibility, and data transfer usage ratios directly from the implementation. But for compute and overheads we have no analytical estimate. We plan to run an experiment to:
+
+1. Estimate the missing $r_\text{compute}$.
+2. Validate the analytical $r_i$ against observed metrics
+
+For comparison, the Fairness COGS experiment docs:
+
+- [Test plan](https://www.notion.so/temporalio/Test-plan-for-COGS-measurement-28c8fc56773880169cdcc4087a98ceaf)
+- [Fairness COGS Impact](https://www.notion.so/temporalio/Fairness-COGS-Impact-2c58fc567738808f806cfbf09b771b2c)
+- [Pricing Council doc](https://www.notion.so/temporalio/WIP-Pricing-Council-Fairness-COGS-Impact-2cc8fc56773880dcb3efe435623edd9a)
+
+
+
+
+# Proposed SAA experiment
+
+
+## Workloads
+
+Two workloads, run sequentially on the same cell:
+
+1. **SAW**: execute workflow with one activity (no heartbeat, no retry).
+2. **SAA**: execute standalone activity (no heartbeat, no retry).
+
+## Parameters
+
+**Start rate.** I think that we should fix start rate rather than concurrency, since this naturally controls for end-to-end latency differences between SAA and SAW (i.e. a cell running SAW will see higher load because the concurrency will be higher because the SAW end-to-end latency is higher). The fairness experiment used 4k tasks/s. Is starting 4k executions/s reasonable for us?
+
+**Activity.** Immediate successful return; no heartbeat, no retry. We could compare with a 1s sleep to see if result differ?
+
+**Sync match.** Do one run such that sync match should be 100%, and another tuned such that sync match is lower? Verify sync match from metrics (`syncmatch_latency`, `asyncmatch_latency`)
+
+**Duration and repetitions.** Steady-state load; we need long enough for stable CPU averages. The
+fairness experiment used 6h per scenario but this was maybe because of their more sophisticated
+sinusoidal load design? 1h more than enough for the SAA experiment? ≥2 runs per workload to check
+variance/reproducibility.
+
+## Infrastructure
+
+- Anything special about test cell sizing?
+- Workers should run outside the cell (how did fairness experiment do this?)
+
+## Metrics
+
+Initial dashboard content https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs:
+
+
+- **CPU per service** (frontend, history, matching). `node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate` — a k8s recording rule over cAdvisor container metrics (defined in saas-components prometheus rules).
+- **Memory per service**. `container_memory_working_set_bytes` — also k8s/cAdvisor (defined in saas-components alert rules).
+- **RPC rate by method**, one panel per service (frontend, history, matching). `service_requests` counter ([temporal:common/metrics/metric_defs.go:615](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)), tagged with `operation` (the RPC method name). Recorded by a gRPC server-side interceptor ([telemetry.go:177](https://github.com/temporalio/temporal/blob/main/common/rpc/interceptor/telemetry.go)), so it captures inter-service RPCs (e.g. history→matching `AddActivityTask`).
+- **Astra writes by table**. `cassandra_query` counter with `verb!="select"`, plus `cassandra_batch` counter, both broken down by `table`. Tags include `operation`, `table`, `verb`, `cas` ([saas-temporal:cds/metrics/metrics.go:233,238](https://github.com/temporalio/saas-temporal/blob/main/cds/metrics/metrics.go)).
+- **Astra reads by table**. `cassandra_query` with `verb="select"`, broken down by `table`.
+- **WAL operation rate by type**. `wal_latency_count` ([saas-temporal:cds/metrics/metrics.go:35](https://github.com/temporalio/saas-temporal/blob/main/cds/metrics/metrics.go)) broken down by `walType` label (values: `MUTABLE_STATE_WAL`, `HISTORY_EVENT_WAL`, `LARGE_PAYLOAD_WAL` — see [saas-temporal:cds/common/tag/tag.go:11-24](https://github.com/temporalio/saas-temporal/blob/main/cds/common/tag/tag.go)). Note: this metric covers both reads and writes; there is no separate write-only WAL metric. This is arguably more relevant to COGS since WAL reads also cost something.
+- **Visibility persistence rate by operation**. `visibility_persistence_requests` counter ([temporal:common/metrics/metric_defs.go:1398](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)), tagged with `operation` (values include `RecordWorkflowExecutionStarted`, `RecordWorkflowExecutionClosed`, `UpsertWorkflowExecution`, `DeleteWorkflowExecution` — see [visiblity_manager_metrics.go](https://github.com/temporalio/temporal/blob/main/common/persistence/visibility/visiblity_manager_metrics.go)).
+- **Sync vs async match rate**. `syncmatch_latency_count` and `asyncmatch_latency_count` ([temporal:common/metrics/metric_defs.go:1119-1120](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)).
+
+
+## Load generator (omes)
+
+- Add a new scenario that starts standalone activities directly from the load generator, not from within a workflow.
+- Build the omes Go worker Docker image and deploy it as a pod on k8s, configured to poll the test cell. Do we have implementation we can borrow from the fairness experiment?
+
+
+
+
+<details>
+<summary>Appendix: Comparison with fairness experiment (see commits by David Reiss)</summary>
+
+| | Fairness | SAA |
+|---|---|---|
+| **Treatments** | Same workload, two matcher modes | Two execution types (SAW vs SAA) |
+| **Quantity computed** | $\Delta C / C$ | Ratio $r_i = q_i(\text{SAA}) / q_i(\text{SAW})$ |
+| **Load shape** | Sinusoidal backlog (exercises matcher) | Steady-state at fixed start rate (our model assumes sync match) |
+| **What is measured** | CPU per service, Astra operation rates | CPU per service, memory per service, Astra operation rates by table and verb, WAL write rates, visibility write rates, RPC handling rates per service per method |
+| **Predictions to validate** | None — purely empirical | $r_\text{Cass} = 3/7$, $r_\text{WAL} = 3/14$, $r_\text{Vis} = 3/3$, per-method RPC rates matching proposal table |
+
+Fixed start rate (not fixed task throughput) because SAA and SAW generate different numbers of tasks per execution.
+
+**Question**: what is the incremental COGS of enabling the fairness matcher vs the classic matcher?
+
+**COGS components**: (1) Astra queries (~35% of total COGS), (2) EC2 compute (~9%, split across frontend+matching and history). Ignored: data transfer, Astra storage, non-AWS costs (Clickhouse <3%).
+
+**Setup**: dedicated test cell `s-oss-dnr-faircogs3` (64 partitions). Load generator: Omes Ebb and Flow — sinusoidal activity task backlog. 5 scenarios (classic, fairness with 0/1k/100k keys, priority), each 6 hours. Measured via [dedicated Grafana dashboard](https://grafana.tmprl-internal.cloud/d/df6pldpkiy1vka/faircogs).
+
+**Results**: Astra showed no significant increase. CPU increased up to 23% (frontend) and 36% (history) in the worst case (1k fairness keys). COGS impact: $(0.035 \times 0.23) + (0.057 \times 0.36) = 2.8\%$. Pricing council recommendation: price fairness on value to customer, not COGS.
+
+
+
+
+
+</details>
+
+<details>
+<summary>Appendix: possible experimental outcomes</summary>
+
+- **Analytical predictions confirmed, $R$ in predicted range.** Observed $r_\text{Cass}$, $r_\text{WAL}$, $r_\text{Vis}$, and per-method RPC rates match the analytical derivations. $r_\text{compute}$ lands in $[0.4, 1.0]$, giving $R$ in roughly $0.52$–$0.64$. We present $R$ with a tighter confidence interval than the proposal (because $r_\text{compute}$ is now estimated, not bounded).
+- **$r_\text{compute}$ is low, pushing $R$ toward 0.5.** If $r_\text{compute} \approx 0.4$ and analytical predictions hold, $R \approx 0.52$. Cannibalization is near-zero.
+- **Observed $r_i$ diverge from analytical predictions.** Some assumption is wrong (e.g. sync match doesn't hold at test load, or there are unaccounted persistence writes). We recompute $R$ using observed values and identify which assumption failed and whether it reflects production conditions or a test artifact.
+- **$R$ is higher than predicted.** $R > 0.64$ would mean worse cannibalization than estimated. Options: accept the margin reduction (bounded by ~3% SAW share), adjust billing, or identify engineering work to reduce SAA COGS.
+
+</details>
+
+END_DOCUMENT------------------------------------------------------------------------------
+
+
+Your task is to help me design and build the omes-based tooling that we will use to perform the experiments outlined above to learn about COGS of SAA an SAW. We are in the omes repo; study it carefully. Our work will broadly break into the following phases that we must design holistically:
+
+(1) Add any missing omes functionality that will be needed in order to be able to use omes to generate the SAA and SAW load for the experiments.
+(2) Run the experiments against the cloud cell that Stephen has prepared: its name is s-saa-cogs.
+
+I am not familiar with performing operations against cloud cells, so you will need to resarch and help me during this. But we have several good resources: study the contents of the 'oncall' and 'runbooks' repos, and also use the /agent-slack skill. You also have Notion and Temporal Docs MCP. Use the more modern 'ct' rather than its alias 'omni'.
+
+Initial grafana dashboard JSON is at .task/saacogs.json.
+
+Important: I'd like an early aim to be to get an end-to-end proof-of-principle of this working. Therefore let's not make the omes component sophisticated initially; just the bare minimum to run an SAW and SAA workload. But I am a bit intimidated by doing anything with the cloud cell since I don't know how. So I guess one early aim is to be able to point our metrics dashboard at s-saa-cogs, and see idle state, then run one of our omes commands, and see activity increase iin the dashboard. Please maintain a file of useful shell commands with terse comments where necessary. I will run them and show you the outut. Don't do operations against cloud or observability yourself unless I explicitly ask you to.
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
new file mode 120000
index 00000000..81744092
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1 @@
+.task/AGENTS.md
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 00000000..81744092
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+.task/AGENTS.md
\ No newline at end of file

From 0e752410cfd58ff18c9d076d5de60cbb83d0343d Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 09:57:14 -0400
Subject: [PATCH 02/40] neo(task): Project description (from
 /Users/dan/worktrees/omes/saa-cogs/omes/.task/

---
 .task/commands.sh |  68 ++++++++++++++++++++
 .task/research.md | 159 ++++++++++++++++++++++++++++++++++++++++++++++
 .task/task.md     |   2 +-
 3 files changed, 228 insertions(+), 1 deletion(-)
 create mode 100644 .task/commands.sh
 create mode 100644 .task/research.md

diff --git a/.task/commands.sh b/.task/commands.sh
new file mode 100644
index 00000000..8829d79c
--- /dev/null
+++ b/.task/commands.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# SAA COGS experiment: useful commands
+# Cell: s-saa-cogs
+
+CELL="s-saa-cogs"
+# Namespace TBD — confirm with Stephen. Likely one of:
+#   ${CELL}-marathon.e2e
+#   or a custom namespace
+NS="${CELL}-marathon.e2e"
+HOST="${NS}.tmprl-test.cloud:7233"
+
+# ── Local testing ────────────────────────────────────────────────────────────
+# Start local dev server (with standalone activity support)
+temporal server start-dev --headless --log-level warn
+
+# SAW: single-activity workflow (baseline)
+go run ./cmd run-scenario-with-worker \
+  --scenario saa_cogs_saw --language go \
+  --iterations 10 --max-concurrent 5 --run-id saw-local-1
+
+# SAA: standalone activity (no workflow)
+go run ./cmd run-scenario-with-worker \
+  --scenario saa_cogs_saa --language go \
+  --iterations 10 --max-concurrent 5 --run-id saa-local-1
+
+# Rate-limited run (e.g. 10 executions/s for 5 minutes)
+go run ./cmd run-scenario-with-worker \
+  --scenario saa_cogs_saw --language go \
+  --duration 5m --max-iterations-per-second 10 --max-concurrent 100 --run-id saw-rate-1
+
+# ── Cloud cell operations ────────────────────────────────────────────────────
+# Check cell pods
+ct kubectl --context $CELL get pods -n temporal
+
+# Check namespace
+omni admintools --context $CELL -- temporal operator namespace describe $NS
+
+# ── Running against cloud cell ───────────────────────────────────────────────
+# Requires TLS certs or API key. Two options:
+
+# Option A: mTLS
+# TLS_CERT=path/to/cert.pem
+# TLS_KEY=path/to/key.pem
+# go run ./cmd run-scenario \
+#   --scenario saa_cogs_saw \
+#   --server-address $HOST --namespace $NS \
+#   --tls --tls-cert-path $TLS_CERT --tls-key-path $TLS_KEY \
+#   --do-not-register-search-attributes \
+#   --iterations 1 --run-id saw-cloud-1
+
+# Option B: API key
+# go run ./cmd run-scenario \
+#   --scenario saa_cogs_saw \
+#   --server-address $HOST --namespace $NS \
+#   --tls --auth-header "Bearer $API_KEY" \
+#   --do-not-register-search-attributes \
+#   --iterations 1 --run-id saw-cloud-1
+
+# Worker (separate terminal, same auth flags)
+# go run ./cmd run-worker \
+#   --scenario saa_cogs_saw --language go \
+#   --server-address $HOST --namespace $NS \
+#   --tls --tls-cert-path $TLS_CERT --tls-key-path $TLS_KEY \
+#   --run-id saw-cloud-1
+
+# ── Grafana ──────────────────────────────────────────────────────────────────
+# Dashboard: https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs
+# Set cluster variable to: s-saa-cogs
diff --git a/.task/research.md b/.task/research.md
new file mode 100644
index 00000000..cab82543
--- /dev/null
+++ b/.task/research.md
@@ -0,0 +1,159 @@
+# SAA COGS Experiment: Research & Design
+
+## 1. Current State of Omes
+
+### Architecture
+Omes is a load generation framework for Temporal. Scenarios are Go files in `scenarios/` that
+register via `init()` → `loadgen.MustRegisterScenario()`. The scenario name comes from the
+filename. Execution flows:
+
+1. `run-scenario` command: dials Temporal, runs scenario executor
+2. `run-worker` command: starts a worker (Go/Python/etc) polling a task queue
+3. `run-scenario-with-worker`: runs both together (local development)
+
+### Executor Types
+- `GenericExecutor`: takes a `func(ctx, *Run) error` — most flexible
+- `KitchenSinkExecutor`: wraps `GenericExecutor`, starts kitchen-sink workflows with configurable action sequences
+- `FuzzExecutor`: random action generation
+
+### Existing Standalone Activity Support
+Branch `standalone-activity` (commit `efbbb7f`) adds SAA to the `throughput_stress` scenario as
+an *optional extra activity within a workflow*. The implementation:
+
+1. Proto: `StandaloneActivity` message in `kitchen_sink.proto`
+2. Helper: `StandaloneActivity()` in `loadgen/kitchensink/helpers.go` creates an action
+3. Worker: `ExecuteStandaloneActivity()` in `workers/go/kitchensink/kitchen_sink.go` — called as a
+   *workflow activity* that internally calls `StartActivityExecution` + `PollActivityExecution`
+4. Scenario: enabled via `--option enable-standalone-activity=true`
+
+**Critical observation**: This existing support executes SAA *from within a workflow activity*.
+That is useful for testing SAA functionality but **not** for the COGS experiment. For COGS, we need
+to run SAA directly from the load generator (no workflow involved) so that the only server-side
+work is the standalone activity execution itself.
+
+## 2. What We Need for the COGS Experiment
+
+### Two New Scenarios
+
+**`saa_cogs_saw`** — Single Activity Workflow (the baseline):
+- Each iteration: start a workflow that executes one activity (payload: 256B in, 256B out), then completes
+- This is very close to `workflow_with_single_noop_activity` but with a payload activity
+
+**`saa_cogs_saa`** — Standalone Activity:
+- Each iteration: call `StartActivityExecution` directly from the load generator, then
+  `PollActivityExecution` to wait for the result
+- No workflow involved
+- Same activity (payload: 256B in, 256B out) and task queue
+- **Requires a `GenericExecutor`** since `KitchenSinkExecutor` always starts workflows
+
+Both scenarios must use the same worker (the Go worker with `payload` activity registered).
+
+### Key Design Decisions
+
+1. **Activity type**: `payload` with 256B input, 256B output (matching the COGS analysis)
+2. **No heartbeat, no retry** (matching the COGS analysis; retry max_attempts=1)
+3. **Fixed start rate** (not fixed concurrency) — controls for latency differences
+4. **Same task queue** for both scenarios — ensures same worker setup
+5. **Sync match preferred** — the COGS analysis assumes sync match; verify via metrics
+
+### SAA Load Generator Implementation
+
+The SAA scenario needs to call gRPC APIs directly. Looking at the existing
+`ExecuteStandaloneActivity` in the worker code (`workers/go/kitchensink/kitchen_sink.go:46-120`),
+we have a working reference. The scenario version should:
+
+1. Use `client.WorkflowService()` to get the gRPC client
+2. Call `StartActivityExecution` with the activity config
+3. Call `PollActivityExecution` to wait for completion
+4. This is a `GenericExecutor` with a custom `Execute` function
+
+## 3. Cloud Cell Operations
+
+### Connecting to a Cloud Cell
+
+From `bench-go.mdx`, the namespace format for test cells is `{cellId}-marathon.e2e` and the host
+is `{cellId}-marathon.e2e.tmprl-test.cloud:7233`. For our cell `s-saa-cogs`:
+- Namespace: `s-saa-cogs-marathon.e2e` (to be confirmed — Stephen may have set up differently)
+- Host: `s-saa-cogs-marathon.e2e.tmprl-test.cloud:7233`
+
+Omes connects via:
+```
+--server-address <host:port> --namespace <ns> --tls --tls-cert-path <cert> --tls-key-path <key>
+```
+
+Or with API key auth:
+```
+--server-address <host:port> --namespace <ns> --tls --auth-header "Bearer <api-key>"
+```
+
+### Running omes against a cloud cell
+
+Two options:
+1. **Local**: Run `go run ./cmd run-scenario` and `go run ./cmd run-worker` locally, connecting to
+   the cloud cell via TLS. Simplest for proof-of-concept. Higher latency (network round trip to
+   cloud) but the load generator itself isn't on the critical path for COGS measurement.
+2. **K8s pod**: Deploy omes worker as a pod on the cell's k8s cluster. Lower latency, more
+   realistic. The bench-go runbook shows this is the standard approach. Uses `omni scaffold` with
+   `--benchgo-enabled` or manual deployment.
+
+For initial proof-of-concept: run locally. For the actual experiment: deploy to k8s.
+
+### Grafana Dashboard
+
+The dashboard at `https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs` uses a `$cluster`
+variable. Set `cluster=s-saa-cogs` to point at our cell.
+
+### Cell Setup Verification
+
+Use `ct` / `omni` to verify cell state:
+```sh
+# Check cell status
+ct kubectl --context s-saa-cogs get pods -n temporal
+
+# Check namespace exists
+omni admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e
+```
+
+### Search Attributes
+
+Cloud cells cannot register search attributes via the SDK — they must be registered via the
+control plane. The `--do-not-register-search-attributes` flag exists for this. We should use it,
+and register `OmesExecutionID` separately if needed. For the simple COGS scenarios, we may not
+even need search attributes.
+
+## 4. Implementation Plan
+
+### Phase 1: Minimal Scenarios (omes code changes)
+
+1. Create `scenarios/saa_cogs_saw.go` — SAW scenario using `KitchenSinkExecutor`
+2. Create `scenarios/saa_cogs_saa.go` — SAA scenario using `GenericExecutor` with direct gRPC calls
+3. Both share config: payload size, start rate, duration
+
+### Phase 2: Local Proof-of-Concept
+
+1. Test both scenarios against local Temporal server
+2. Run `go run ./cmd run-scenario-with-worker` for SAW
+3. For SAA: run worker separately, then scenario (since SAA doesn't use workflows but the
+   worker still needs to poll for activity tasks)
+
+### Phase 3: Cloud Cell Connection
+
+1. Obtain credentials for s-saa-cogs cell
+2. Verify dashboard shows idle state
+3. Run a single SAW iteration and observe metrics
+4. Run a single SAA iteration and observe metrics
+
+### Phase 4: Full Experiment
+
+1. Deploy omes worker to cloud cell k8s
+2. Run SAW at target start rate for target duration
+3. Wait for cool-down, collect metrics
+4. Run SAA at same start rate for same duration
+5. Collect and compare metrics
+
+## 5. Open Questions
+
+- What namespace(s) are configured on s-saa-cogs?
+- How do we obtain TLS certs or API keys for the cell? (Check oncall or runbooks repos or search slack)
+- Does the cell have CHASM standalone activities enabled? (Dynamic config flag)
+- Worker deployment: should we use the existing bench-go infrastructure or deploy omes directly?
diff --git a/.task/task.md b/.task/task.md
index 3e9d1e26..0e9dc39d 100644
--- a/.task/task.md
+++ b/.task/task.md
@@ -720,4 +720,4 @@ I am not familiar with performing operations against cloud cells, so you will ne
 
 Initial grafana dashboard JSON is at .task/saacogs.json.
 
-Important: I'd like an early aim to be to get an end-to-end proof-of-principle of this working. Therefore let's not make the omes component sophisticated initially; just the bare minimum to run an SAW and SAA workload. But I am a bit intimidated by doing anything with the cloud cell since I don't know how. So I guess one early aim is to be able to point our metrics dashboard at s-saa-cogs, and see idle state, then run one of our omes commands, and see activity increase iin the dashboard. Please maintain a file of useful shell commands with terse comments where necessary. I will run them and show you the outut. Don't do operations against cloud or observability yourself unless I explicitly ask you to.
\ No newline at end of file
+Important: I'd like an early aim to be to get an end-to-end proof-of-principle of this working. Therefore let's not make the omes component sophisticated initially; just the bare minimum to run an SAW and SAA workload. But I am a bit intimidated by doing anything with the cloud cell since I don't know how. So I guess one early aim is to be able to point our metrics dashboard at s-saa-cogs, and see idle state, then run one of our omes commands, and see activity increase in the dashboard. Please maintain a file of useful shell commands with terse comments where necessary. I will run them and show you the outut. Don't do operations against cloud or observability yourself unless I explicitly ask you to.
\ No newline at end of file

From 8889ed6a878107dfbcc7d497f8ec427070630ab3 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 10:03:51 -0400
Subject: [PATCH 03/40] Specify SDK version

---
 .task/task.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.task/task.md b/.task/task.md
index 0e9dc39d..b9c71259 100644
--- a/.task/task.md
+++ b/.task/task.md
@@ -720,4 +720,6 @@ I am not familiar with performing operations against cloud cells, so you will ne
 
 Initial grafana dashboard JSON is at .task/saacogs.json.
 
-Important: I'd like an early aim to be to get an end-to-end proof-of-principle of this working. Therefore let's not make the omes component sophisticated initially; just the bare minimum to run an SAW and SAA workload. But I am a bit intimidated by doing anything with the cloud cell since I don't know how. So I guess one early aim is to be able to point our metrics dashboard at s-saa-cogs, and see idle state, then run one of our omes commands, and see activity increase in the dashboard. Please maintain a file of useful shell commands with terse comments where necessary. I will run them and show you the outut. Don't do operations against cloud or observability yourself unless I explicitly ask you to.
\ No newline at end of file
+Important: I'd like an early aim to be to get an end-to-end proof-of-principle of this working. Therefore let's not make the omes component sophisticated initially; just the bare minimum to run an SAW and SAA workload. But I am a bit intimidated by doing anything with the cloud cell since I don't know how. So I guess one early aim is to be able to point our metrics dashboard at s-saa-cogs, and see idle state, then run one of our omes commands, and see activity increase in the dashboard. Please maintain a file of useful shell commands with terse comments where necessary. I will run them and show you the outut. Don't do operations against cloud or observability yourself unless I explicitly ask you to.
+
+In the omes work, we must use the latest version of the SDK with Standalone Activity support, such that our code is consistent with what Temporal Docs (use MCP) and the samples-go repo show.
\ No newline at end of file

From 93c14ba789dfefd63f194a96e40908acd2c3d1fa Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 10:15:11 -0400
Subject: [PATCH 04/40] neo: Project description (from
 /Users/dan/worktrees/omes/saa-cogs/omes/.task/

---
 commands.sh               | 51 ++++++++++++++++++++++++++++++++++
 scenarios/saa_cogs_saa.go | 38 +++++++++++++++++++++++++
 scenarios/saa_cogs_saw.go | 58 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 147 insertions(+)
 create mode 100644 commands.sh
 create mode 100644 scenarios/saa_cogs_saa.go
 create mode 100644 scenarios/saa_cogs_saw.go

diff --git a/commands.sh b/commands.sh
new file mode 100644
index 00000000..7951ce07
--- /dev/null
+++ b/commands.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+# SAA COGS experiment — useful commands
+# See .task/plan.md for context.
+
+## --- Local testing (against embedded dev server) ---
+
+# SAW: 5 iterations
+go run ./cmd run-scenario-with-worker --scenario saa_cogs_saw --language go --iterations 5
+
+# SAA: 5 iterations
+go run ./cmd run-scenario-with-worker --scenario saa_cogs_saa --language go --iterations 5
+
+# SAW: sustained 60s at 10 starts/s
+go run ./cmd run-scenario-with-worker --scenario saa_cogs_saw --language go \
+    --duration 60s --max-iterations-per-second 10 --max-concurrent 100
+
+# SAA: sustained 60s at 10 starts/s
+go run ./cmd run-scenario-with-worker --scenario saa_cogs_saa --language go \
+    --duration 60s --max-iterations-per-second 10 --max-concurrent 100
+
+## --- Cloud cell: s-saa-cogs ---
+
+CELL=s-saa-cogs
+NS=${CELL}-marathon.e2e
+HOST=${NS}.tmprl-test.cloud:7233
+
+# Verify cell is alive
+ct kubectl --context $CELL get pods -n temporal
+
+# Check namespace
+omni admintools --context $CELL -- temporal operator namespace describe $NS
+
+# Run worker against cloud cell (in one terminal)
+go run ./cmd run-worker --language go --run-id saa-cogs-test \
+    --server-address $HOST --namespace $NS --tls \
+    --tls-cert-path /tmp/saa-cogs-cert.pem --tls-key-path /tmp/saa-cogs-key.pem
+
+# Run SAW scenario against cloud cell (in another terminal)
+go run ./cmd run-scenario --scenario saa_cogs_saw --run-id saa-cogs-test \
+    --server-address $HOST --namespace $NS --tls \
+    --tls-cert-path /tmp/saa-cogs-cert.pem --tls-key-path /tmp/saa-cogs-key.pem \
+    --iterations 5 --do-not-register-search-attributes
+
+# Run SAA scenario against cloud cell
+go run ./cmd run-scenario --scenario saa_cogs_saa --run-id saa-cogs-test \
+    --server-address $HOST --namespace $NS --tls \
+    --tls-cert-path /tmp/saa-cogs-cert.pem --tls-key-path /tmp/saa-cogs-key.pem \
+    --iterations 5 --do-not-register-search-attributes
+
+# Grafana dashboard
+# https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs?var-cluster=s-saa-cogs
diff --git a/scenarios/saa_cogs_saa.go b/scenarios/saa_cogs_saa.go
new file mode 100644
index 00000000..1ec05a7b
--- /dev/null
+++ b/scenarios/saa_cogs_saa.go
@@ -0,0 +1,38 @@
+package scenarios
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"go.temporal.io/sdk/client"
+	"go.temporal.io/sdk/temporal"
+
+	"github.com/temporalio/omes/loadgen"
+)
+
+func init() {
+	loadgen.MustRegisterScenario(loadgen.Scenario{
+		Description: "SAA for COGS: standalone activity with payload, no workflow.",
+		ExecutorFn: func() loadgen.Executor {
+			return &loadgen.GenericExecutor{
+				Execute: executeSAA,
+			}
+		},
+	})
+}
+
+func executeSAA(ctx context.Context, run *loadgen.Run) error {
+	inputData := make([]byte, 256)
+	handle, err := run.Client.ExecuteActivity(ctx, client.StartActivityOptions{
+		ID:                     fmt.Sprintf("a-%s-%s-%d", run.RunID, run.ExecutionID, run.Iteration),
+		TaskQueue:              run.TaskQueue(),
+		ScheduleToCloseTimeout: 60 * time.Second,
+		RetryPolicy:            &temporal.RetryPolicy{MaximumAttempts: 1},
+	}, "payload", inputData, int32(256))
+	if err != nil {
+		return fmt.Errorf("failed to start standalone activity: %w", err)
+	}
+	var result []byte
+	return handle.Get(ctx, &result)
+}
diff --git a/scenarios/saa_cogs_saw.go b/scenarios/saa_cogs_saw.go
new file mode 100644
index 00000000..ef4e92d8
--- /dev/null
+++ b/scenarios/saa_cogs_saw.go
@@ -0,0 +1,58 @@
+package scenarios
+
+import (
+	"time"
+
+	"go.temporal.io/api/common/v1"
+	"google.golang.org/protobuf/types/known/durationpb"
+
+	"github.com/temporalio/omes/loadgen"
+	"github.com/temporalio/omes/loadgen/kitchensink"
+)
+
+func init() {
+	loadgen.MustRegisterScenario(loadgen.Scenario{
+		Description: "SAW baseline for COGS: single workflow executing one payload activity.",
+		ExecutorFn: func() loadgen.Executor {
+			return loadgen.KitchenSinkExecutor{
+				TestInput: &kitchensink.TestInput{
+					WorkflowInput: &kitchensink.WorkflowInput{
+						InitialActions: []*kitchensink.ActionSet{
+							saaCogsSAWActionSet(),
+						},
+					},
+				},
+			}
+		},
+	})
+}
+
+func saaCogsSAWActionSet() *kitchensink.ActionSet {
+	return &kitchensink.ActionSet{
+		Actions: []*kitchensink.Action{
+			{
+				Variant: &kitchensink.Action_ExecActivity{
+					ExecActivity: &kitchensink.ExecuteActivityAction{
+						ActivityType: &kitchensink.ExecuteActivityAction_Payload{
+							Payload: &kitchensink.ExecuteActivityAction_PayloadActivity{
+								BytesToReceive: 256,
+								BytesToReturn:  256,
+							},
+						},
+						ScheduleToCloseTimeout: durationpb.New(60 * time.Second),
+						RetryPolicy: &common.RetryPolicy{
+							MaximumAttempts: 1,
+						},
+					},
+				},
+			},
+			{
+				Variant: &kitchensink.Action_ReturnResult{
+					ReturnResult: &kitchensink.ReturnResultAction{
+						ReturnThis: &common.Payload{},
+					},
+				},
+			},
+		},
+	}
+}

From e75708189dbfe061d404f43ea45b8d1e6dc1b8b5 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 10:15:11 -0400
Subject: [PATCH 05/40] neo(task): Project description (from
 /Users/dan/worktrees/omes/saa-cogs/omes/.task/

---
 .task/commands.sh |  68 -----------------------
 .task/plan.md     | 135 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+), 68 deletions(-)
 delete mode 100644 .task/commands.sh
 create mode 100644 .task/plan.md

diff --git a/.task/commands.sh b/.task/commands.sh
deleted file mode 100644
index 8829d79c..00000000
--- a/.task/commands.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/bash
-# SAA COGS experiment: useful commands
-# Cell: s-saa-cogs
-
-CELL="s-saa-cogs"
-# Namespace TBD — confirm with Stephen. Likely one of:
-#   ${CELL}-marathon.e2e
-#   or a custom namespace
-NS="${CELL}-marathon.e2e"
-HOST="${NS}.tmprl-test.cloud:7233"
-
-# ── Local testing ────────────────────────────────────────────────────────────
-# Start local dev server (with standalone activity support)
-temporal server start-dev --headless --log-level warn
-
-# SAW: single-activity workflow (baseline)
-go run ./cmd run-scenario-with-worker \
-  --scenario saa_cogs_saw --language go \
-  --iterations 10 --max-concurrent 5 --run-id saw-local-1
-
-# SAA: standalone activity (no workflow)
-go run ./cmd run-scenario-with-worker \
-  --scenario saa_cogs_saa --language go \
-  --iterations 10 --max-concurrent 5 --run-id saa-local-1
-
-# Rate-limited run (e.g. 10 executions/s for 5 minutes)
-go run ./cmd run-scenario-with-worker \
-  --scenario saa_cogs_saw --language go \
-  --duration 5m --max-iterations-per-second 10 --max-concurrent 100 --run-id saw-rate-1
-
-# ── Cloud cell operations ────────────────────────────────────────────────────
-# Check cell pods
-ct kubectl --context $CELL get pods -n temporal
-
-# Check namespace
-omni admintools --context $CELL -- temporal operator namespace describe $NS
-
-# ── Running against cloud cell ───────────────────────────────────────────────
-# Requires TLS certs or API key. Two options:
-
-# Option A: mTLS
-# TLS_CERT=path/to/cert.pem
-# TLS_KEY=path/to/key.pem
-# go run ./cmd run-scenario \
-#   --scenario saa_cogs_saw \
-#   --server-address $HOST --namespace $NS \
-#   --tls --tls-cert-path $TLS_CERT --tls-key-path $TLS_KEY \
-#   --do-not-register-search-attributes \
-#   --iterations 1 --run-id saw-cloud-1
-
-# Option B: API key
-# go run ./cmd run-scenario \
-#   --scenario saa_cogs_saw \
-#   --server-address $HOST --namespace $NS \
-#   --tls --auth-header "Bearer $API_KEY" \
-#   --do-not-register-search-attributes \
-#   --iterations 1 --run-id saw-cloud-1
-
-# Worker (separate terminal, same auth flags)
-# go run ./cmd run-worker \
-#   --scenario saa_cogs_saw --language go \
-#   --server-address $HOST --namespace $NS \
-#   --tls --tls-cert-path $TLS_CERT --tls-key-path $TLS_KEY \
-#   --run-id saw-cloud-1
-
-# ── Grafana ──────────────────────────────────────────────────────────────────
-# Dashboard: https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs
-# Set cluster variable to: s-saa-cogs
diff --git a/.task/plan.md b/.task/plan.md
new file mode 100644
index 00000000..7b5775a6
--- /dev/null
+++ b/.task/plan.md
@@ -0,0 +1,135 @@
+# Implementation Plan: SAA COGS Load Generation
+
+## Goal
+
+Create two omes scenarios to generate SAW and SAA workloads against cloud cell `s-saa-cogs`, then
+observe metrics on the Grafana dashboard.
+
+## Design
+
+### Scenarios
+
+**`saa_cogs_saw`** — Single Activity Workflow baseline. Uses `KitchenSinkExecutor` with a single
+payload activity (256B in, 256B out), no retry, no heartbeat. Very close to
+`workflow_with_single_noop_activity` but with payload instead of noop.
+
+**`saa_cogs_saa`** — Standalone Activity. Uses `GenericExecutor`. Each iteration calls
+`client.ExecuteActivity()` (the SDK's standalone activity API) with the same payload activity, then
+`handle.Get()` to wait for the result. No workflow involved.
+
+Both use the same task queue (derived from run-id) and the same Go worker (which already registers
+the `payload` activity).
+
+### Why GenericExecutor for SAA
+
+`KitchenSinkExecutor` always starts a kitchen-sink workflow. The SAA scenario must call
+`client.ExecuteActivity` directly — no workflow. `GenericExecutor` gives us the `Execute` function
+hook, plus all the concurrency/rate-limiting/duration infrastructure.
+
+### SDK version
+
+The current `go.temporal.io/sdk v1.40.0` already includes `client.ExecuteActivity` (added in
+v1.40.0, commit `215920a6`). No upgrade needed.
+
+### Activity configuration
+
+Both scenarios use the `payload` activity type (already registered in the Go worker as `"payload"`).
+Arguments: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No heartbeat. Retry policy
+`MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
+
+## Implementation
+
+### Step 1: Create `scenarios/saa_cogs_saw.go`
+
+```go
+package scenarios
+
+func init() {
+    loadgen.MustRegisterScenario(loadgen.Scenario{
+        Description: "SAW baseline for COGS: single workflow executing one payload activity.",
+        ExecutorFn: func() loadgen.Executor {
+            return loadgen.KitchenSinkExecutor{
+                TestInput: &kitchensink.TestInput{
+                    WorkflowInput: &kitchensink.WorkflowInput{
+                        InitialActions: []*kitchensink.ActionSet{
+                            payloadActivityActionSet(),
+                        },
+                    },
+                },
+            }
+        },
+    })
+}
+```
+
+Where `payloadActivityActionSet()` creates a `PayloadActivity(256, 256, ...)` plus
+`ReturnResultAction`, with `MaximumAttempts: 1`, `ScheduleToCloseTimeout: 60s`.
+
+### Step 2: Create `scenarios/saa_cogs_saa.go`
+
+```go
+package scenarios
+
+func init() {
+    loadgen.MustRegisterScenario(loadgen.Scenario{
+        Description: "SAA for COGS: standalone activity with payload, no workflow.",
+        ExecutorFn: func() loadgen.Executor {
+            return &loadgen.GenericExecutor{
+                Execute: executeSAA,
+            }
+        },
+    })
+}
+
+func executeSAA(ctx context.Context, run *loadgen.Run) error {
+    inputData := make([]byte, 256)
+    handle, err := run.Client.ExecuteActivity(ctx, client.StartActivityOptions{
+        ID:                     fmt.Sprintf("a-%s-%s-%d", run.RunID, run.ExecutionID, run.Iteration),
+        TaskQueue:              run.TaskQueue(),
+        ScheduleToCloseTimeout: 60 * time.Second,
+        RetryPolicy:            &temporal.RetryPolicy{MaximumAttempts: 1},
+    }, "payload", inputData, int32(256))
+    if err != nil {
+        return err
+    }
+    var result []byte
+    return handle.Get(ctx, &result)
+}
+```
+
+This calls `"payload"` by name (string) so the SDK dispatches it to the worker which has it
+registered as `activity.RegisterOptions{Name: "payload"}`.
+
+### Step 3: Create `commands.sh` — useful shell commands
+
+A file with terse comments documenting how to run the scenarios locally and against the cloud cell.
+
+### Step 4: Test locally
+
+Run against local dev server using `run-scenario-with-worker`:
+
+```sh
+# SAW
+go run ./cmd run-scenario-with-worker \
+    --scenario saa_cogs_saw --language go \
+    --iterations 5
+
+# SAA
+go run ./cmd run-scenario-with-worker \
+    --scenario saa_cogs_saa --language go \
+    --iterations 5
+```
+
+### Step 5: Connect to cloud cell
+
+Use `omni admintools` to verify cell state, then obtain credentials. Run scenarios against
+`s-saa-cogs-marathon.e2e.tmprl-test.cloud:7233` with TLS.
+
+## Verification
+
+1. **Build**: `go build ./...` succeeds.
+2. **Lint/vet**: `go vet ./...` succeeds.
+3. **Local test — SAW**: `go run ./cmd run-scenario-with-worker --scenario saa_cogs_saw --language go --iterations 5` completes successfully.
+4. **Local test — SAA**: `go run ./cmd run-scenario-with-worker --scenario saa_cogs_saa --language go --iterations 5` completes successfully.
+5. **List scenarios**: `go run ./cmd list-scenarios` includes both `saa_cogs_saw` and `saa_cogs_saa`.
+6. **Cloud cell proof-of-concept**: Point dashboard at `s-saa-cogs`, run one scenario, observe metrics increase.

From dcad9c7d563120d23bb665dede681a9a7b8c2eac Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 10:19:17 -0400
Subject: [PATCH 06/40] Plan for SAA COGS load generation scenarios

---
 .task/plan.md             | 128 ++++++++++++++------------------------
 commands.sh               |  51 ---------------
 scenarios/saa_cogs_saa.go |  38 -----------
 scenarios/saa_cogs_saw.go |  58 -----------------
 4 files changed, 45 insertions(+), 230 deletions(-)
 delete mode 100644 commands.sh
 delete mode 100644 scenarios/saa_cogs_saa.go
 delete mode 100644 scenarios/saa_cogs_saw.go

diff --git a/.task/plan.md b/.task/plan.md
index 7b5775a6..300decd9 100644
--- a/.task/plan.md
+++ b/.task/plan.md
@@ -20,11 +20,20 @@ payload activity (256B in, 256B out), no retry, no heartbeat. Very close to
 Both use the same task queue (derived from run-id) and the same Go worker (which already registers
 the `payload` activity).
 
-### Why GenericExecutor for SAA
+### Why different executor types
 
-`KitchenSinkExecutor` always starts a kitchen-sink workflow. The SAA scenario must call
-`client.ExecuteActivity` directly — no workflow. `GenericExecutor` gives us the `Execute` function
-hook, plus all the concurrency/rate-limiting/duration infrastructure.
+`KitchenSinkExecutor` always starts a kitchen-sink workflow — this is inherently what SAW needs.
+SAA must call `client.ExecuteActivity` directly (no workflow). `GenericExecutor` gives us the
+`Execute` function hook for this.
+
+Both executor types share the same iteration-driving machinery: `KitchenSinkExecutor` wraps
+`GenericExecutor`, so concurrency control, rate limiting, and duration handling are identical.
+The only difference between the scenarios is what each iteration *does*, which is exactly the
+variable under test.
+
+The activity configuration (256B payload, no retry, 60s timeout) is specified independently in each
+scenario. These are simple literal values; sharing them via an abstraction would add indirection
+without meaningful deduplication.
 
 ### SDK version
 
@@ -37,99 +46,52 @@ Both scenarios use the `payload` activity type (already registered in the Go wor
 Arguments: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No heartbeat. Retry policy
 `MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
 
-## Implementation
+## Implementation steps
 
 ### Step 1: Create `scenarios/saa_cogs_saw.go`
 
-```go
-package scenarios
-
-func init() {
-    loadgen.MustRegisterScenario(loadgen.Scenario{
-        Description: "SAW baseline for COGS: single workflow executing one payload activity.",
-        ExecutorFn: func() loadgen.Executor {
-            return loadgen.KitchenSinkExecutor{
-                TestInput: &kitchensink.TestInput{
-                    WorkflowInput: &kitchensink.WorkflowInput{
-                        InitialActions: []*kitchensink.ActionSet{
-                            payloadActivityActionSet(),
-                        },
-                    },
-                },
-            }
-        },
-    })
-}
-```
-
-Where `payloadActivityActionSet()` creates a `PayloadActivity(256, 256, ...)` plus
-`ReturnResultAction`, with `MaximumAttempts: 1`, `ScheduleToCloseTimeout: 60s`.
+`KitchenSinkExecutor` with a single `ActionSet` containing a `PayloadActivity(256, 256)` action
+(with `MaximumAttempts: 1`, `ScheduleToCloseTimeout: 60s`) followed by a `ReturnResultAction`.
 
 ### Step 2: Create `scenarios/saa_cogs_saa.go`
 
-```go
-package scenarios
-
-func init() {
-    loadgen.MustRegisterScenario(loadgen.Scenario{
-        Description: "SAA for COGS: standalone activity with payload, no workflow.",
-        ExecutorFn: func() loadgen.Executor {
-            return &loadgen.GenericExecutor{
-                Execute: executeSAA,
-            }
-        },
-    })
-}
-
-func executeSAA(ctx context.Context, run *loadgen.Run) error {
-    inputData := make([]byte, 256)
-    handle, err := run.Client.ExecuteActivity(ctx, client.StartActivityOptions{
-        ID:                     fmt.Sprintf("a-%s-%s-%d", run.RunID, run.ExecutionID, run.Iteration),
-        TaskQueue:              run.TaskQueue(),
-        ScheduleToCloseTimeout: 60 * time.Second,
-        RetryPolicy:            &temporal.RetryPolicy{MaximumAttempts: 1},
-    }, "payload", inputData, int32(256))
-    if err != nil {
-        return err
-    }
-    var result []byte
-    return handle.Get(ctx, &result)
-}
-```
-
-This calls `"payload"` by name (string) so the SDK dispatches it to the worker which has it
-registered as `activity.RegisterOptions{Name: "payload"}`.
-
-### Step 3: Create `commands.sh` — useful shell commands
-
-A file with terse comments documenting how to run the scenarios locally and against the cloud cell.
+`GenericExecutor` whose `Execute` function:
+1. Calls `run.Client.ExecuteActivity()` with `StartActivityOptions` (ID derived from
+   run/execution/iteration, task queue from `run.TaskQueue()`, same timeout and retry policy as SAW).
+2. Passes activity type `"payload"` by name with `[]byte` (256 zeros) and `int32(256)` as args.
+3. Calls `handle.Get()` to wait for the result.
 
-### Step 4: Test locally
+### Step 3: Create `commands.sh`
 
-Run against local dev server using `run-scenario-with-worker`:
+Useful shell commands with terse comments for:
+- Local testing with `--embedded-server`
+- Cloud cell verification via `ct`
+- Running scenarios against `s-saa-cogs`
 
-```sh
-# SAW
-go run ./cmd run-scenario-with-worker \
-    --scenario saa_cogs_saw --language go \
-    --iterations 5
+### Step 4: Test locally
 
-# SAA
-go run ./cmd run-scenario-with-worker \
-    --scenario saa_cogs_saa --language go \
-    --iterations 5
-```
+- `go build ./...` and `go vet ./...`
+- `go run ./cmd list-scenarios` shows both new scenarios
+- SAW: `go run ./cmd run-scenario-with-worker --scenario saa_cogs_saw --language go --iterations 5 --embedded-server`
+- SAA: same command with `saa_cogs_saa` — will get "Standalone activity is disabled" from the dev
+  server (v1.30.1 doesn't have the feature flag), confirming the code path reaches
+  `StartActivityExecution`. Will succeed on the cloud cell.
 
 ### Step 5: Connect to cloud cell
 
-Use `omni admintools` to verify cell state, then obtain credentials. Run scenarios against
-`s-saa-cogs-marathon.e2e.tmprl-test.cloud:7233` with TLS.
+1. Verify cell: `ct kubectl --context s-saa-cogs get pods -n temporal`
+2. Check namespace: `ct admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e`
+3. Obtain operator TLS certs (from k8s secrets via `ct`, or ask Stephen)
+4. Point Grafana dashboard at `s-saa-cogs`, observe idle state
+5. Run worker + SAW scenario against the cell, observe activity in dashboard
+6. Run worker + SAA scenario, observe activity
 
 ## Verification
 
 1. **Build**: `go build ./...` succeeds.
-2. **Lint/vet**: `go vet ./...` succeeds.
-3. **Local test — SAW**: `go run ./cmd run-scenario-with-worker --scenario saa_cogs_saw --language go --iterations 5` completes successfully.
-4. **Local test — SAA**: `go run ./cmd run-scenario-with-worker --scenario saa_cogs_saa --language go --iterations 5` completes successfully.
-5. **List scenarios**: `go run ./cmd list-scenarios` includes both `saa_cogs_saw` and `saa_cogs_saa`.
-6. **Cloud cell proof-of-concept**: Point dashboard at `s-saa-cogs`, run one scenario, observe metrics increase.
+2. **Lint/vet**: `go vet ./...` clean on our files.
+3. **List scenarios**: `go run ./cmd list-scenarios` includes both `saa_cogs_saw` and `saa_cogs_saa`.
+4. **Local test — SAW**: `run-scenario-with-worker --embedded-server --iterations 5` completes.
+5. **Local test — SAA**: Same command hits `StartActivityExecution` on the server (expected to fail
+   on dev server with "disabled" error; succeeds on cloud cell with CHASM enabled).
+6. **Cloud cell proof-of-concept**: Dashboard shows idle → run scenario → dashboard shows activity.
diff --git a/commands.sh b/commands.sh
deleted file mode 100644
index 7951ce07..00000000
--- a/commands.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env bash
-# SAA COGS experiment — useful commands
-# See .task/plan.md for context.
-
-## --- Local testing (against embedded dev server) ---
-
-# SAW: 5 iterations
-go run ./cmd run-scenario-with-worker --scenario saa_cogs_saw --language go --iterations 5
-
-# SAA: 5 iterations
-go run ./cmd run-scenario-with-worker --scenario saa_cogs_saa --language go --iterations 5
-
-# SAW: sustained 60s at 10 starts/s
-go run ./cmd run-scenario-with-worker --scenario saa_cogs_saw --language go \
-    --duration 60s --max-iterations-per-second 10 --max-concurrent 100
-
-# SAA: sustained 60s at 10 starts/s
-go run ./cmd run-scenario-with-worker --scenario saa_cogs_saa --language go \
-    --duration 60s --max-iterations-per-second 10 --max-concurrent 100
-
-## --- Cloud cell: s-saa-cogs ---
-
-CELL=s-saa-cogs
-NS=${CELL}-marathon.e2e
-HOST=${NS}.tmprl-test.cloud:7233
-
-# Verify cell is alive
-ct kubectl --context $CELL get pods -n temporal
-
-# Check namespace
-omni admintools --context $CELL -- temporal operator namespace describe $NS
-
-# Run worker against cloud cell (in one terminal)
-go run ./cmd run-worker --language go --run-id saa-cogs-test \
-    --server-address $HOST --namespace $NS --tls \
-    --tls-cert-path /tmp/saa-cogs-cert.pem --tls-key-path /tmp/saa-cogs-key.pem
-
-# Run SAW scenario against cloud cell (in another terminal)
-go run ./cmd run-scenario --scenario saa_cogs_saw --run-id saa-cogs-test \
-    --server-address $HOST --namespace $NS --tls \
-    --tls-cert-path /tmp/saa-cogs-cert.pem --tls-key-path /tmp/saa-cogs-key.pem \
-    --iterations 5 --do-not-register-search-attributes
-
-# Run SAA scenario against cloud cell
-go run ./cmd run-scenario --scenario saa_cogs_saa --run-id saa-cogs-test \
-    --server-address $HOST --namespace $NS --tls \
-    --tls-cert-path /tmp/saa-cogs-cert.pem --tls-key-path /tmp/saa-cogs-key.pem \
-    --iterations 5 --do-not-register-search-attributes
-
-# Grafana dashboard
-# https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs?var-cluster=s-saa-cogs
diff --git a/scenarios/saa_cogs_saa.go b/scenarios/saa_cogs_saa.go
deleted file mode 100644
index 1ec05a7b..00000000
--- a/scenarios/saa_cogs_saa.go
+++ /dev/null
@@ -1,38 +0,0 @@
-package scenarios
-
-import (
-	"context"
-	"fmt"
-	"time"
-
-	"go.temporal.io/sdk/client"
-	"go.temporal.io/sdk/temporal"
-
-	"github.com/temporalio/omes/loadgen"
-)
-
-func init() {
-	loadgen.MustRegisterScenario(loadgen.Scenario{
-		Description: "SAA for COGS: standalone activity with payload, no workflow.",
-		ExecutorFn: func() loadgen.Executor {
-			return &loadgen.GenericExecutor{
-				Execute: executeSAA,
-			}
-		},
-	})
-}
-
-func executeSAA(ctx context.Context, run *loadgen.Run) error {
-	inputData := make([]byte, 256)
-	handle, err := run.Client.ExecuteActivity(ctx, client.StartActivityOptions{
-		ID:                     fmt.Sprintf("a-%s-%s-%d", run.RunID, run.ExecutionID, run.Iteration),
-		TaskQueue:              run.TaskQueue(),
-		ScheduleToCloseTimeout: 60 * time.Second,
-		RetryPolicy:            &temporal.RetryPolicy{MaximumAttempts: 1},
-	}, "payload", inputData, int32(256))
-	if err != nil {
-		return fmt.Errorf("failed to start standalone activity: %w", err)
-	}
-	var result []byte
-	return handle.Get(ctx, &result)
-}
diff --git a/scenarios/saa_cogs_saw.go b/scenarios/saa_cogs_saw.go
deleted file mode 100644
index ef4e92d8..00000000
--- a/scenarios/saa_cogs_saw.go
+++ /dev/null
@@ -1,58 +0,0 @@
-package scenarios
-
-import (
-	"time"
-
-	"go.temporal.io/api/common/v1"
-	"google.golang.org/protobuf/types/known/durationpb"
-
-	"github.com/temporalio/omes/loadgen"
-	"github.com/temporalio/omes/loadgen/kitchensink"
-)
-
-func init() {
-	loadgen.MustRegisterScenario(loadgen.Scenario{
-		Description: "SAW baseline for COGS: single workflow executing one payload activity.",
-		ExecutorFn: func() loadgen.Executor {
-			return loadgen.KitchenSinkExecutor{
-				TestInput: &kitchensink.TestInput{
-					WorkflowInput: &kitchensink.WorkflowInput{
-						InitialActions: []*kitchensink.ActionSet{
-							saaCogsSAWActionSet(),
-						},
-					},
-				},
-			}
-		},
-	})
-}
-
-func saaCogsSAWActionSet() *kitchensink.ActionSet {
-	return &kitchensink.ActionSet{
-		Actions: []*kitchensink.Action{
-			{
-				Variant: &kitchensink.Action_ExecActivity{
-					ExecActivity: &kitchensink.ExecuteActivityAction{
-						ActivityType: &kitchensink.ExecuteActivityAction_Payload{
-							Payload: &kitchensink.ExecuteActivityAction_PayloadActivity{
-								BytesToReceive: 256,
-								BytesToReturn:  256,
-							},
-						},
-						ScheduleToCloseTimeout: durationpb.New(60 * time.Second),
-						RetryPolicy: &common.RetryPolicy{
-							MaximumAttempts: 1,
-						},
-					},
-				},
-			},
-			{
-				Variant: &kitchensink.Action_ReturnResult{
-					ReturnResult: &kitchensink.ReturnResultAction{
-						ReturnThis: &common.Payload{},
-					},
-				},
-			},
-		},
-	}
-}

From 9719b5b0b9b91e8fb7ce119b8f81273605ede4aa Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 10:24:06 -0400
Subject: [PATCH 07/40] Update plan: symmetric GenericExecutor for both
 scenarios

Both SAW and SAA use GenericExecutor with a simple Execute function.
SAW gets a dedicated minimal workflow registered on the existing Go worker.
Reuse existing "payload" activity registration. Drop "cogs" from names.
---
 .task/plan.md | 93 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 54 insertions(+), 39 deletions(-)

diff --git a/.task/plan.md b/.task/plan.md
index 300decd9..08e81f3e 100644
--- a/.task/plan.md
+++ b/.task/plan.md
@@ -9,75 +9,90 @@ observe metrics on the Grafana dashboard.
 
 ### Scenarios
 
-**`saa_cogs_saw`** — Single Activity Workflow baseline. Uses `KitchenSinkExecutor` with a single
-payload activity (256B in, 256B out), no retry, no heartbeat. Very close to
-`workflow_with_single_noop_activity` but with payload instead of noop.
-
-**`saa_cogs_saa`** — Standalone Activity. Uses `GenericExecutor`. Each iteration calls
-`client.ExecuteActivity()` (the SDK's standalone activity API) with the same payload activity, then
-`handle.Get()` to wait for the result. No workflow involved.
+Both scenarios use `GenericExecutor` with a simple `Execute` function. This keeps the
+implementations symmetric — the only difference is what each iteration does, which is exactly the
+variable under test.
 
-Both use the same task queue (derived from run-id) and the same Go worker (which already registers
-the `payload` activity).
+**`saw`** — Single Activity Workflow baseline. Each iteration calls `client.ExecuteWorkflow` with a
+dedicated minimal workflow (`saw`) that executes one payload activity and returns. Then
+`handle.Get()` to wait for the result.
 
-### Why different executor types
+**`saa`** — Standalone Activity. Each iteration calls `client.ExecuteActivity` with the same payload
+activity. Then `handle.Get()` to wait for the result. No workflow involved.
 
-`KitchenSinkExecutor` always starts a kitchen-sink workflow — this is inherently what SAW needs.
-SAA must call `client.ExecuteActivity` directly (no workflow). `GenericExecutor` gives us the
-`Execute` function hook for this.
+Both use the same task queue (derived from run-id) and the same Go worker.
 
-Both executor types share the same iteration-driving machinery: `KitchenSinkExecutor` wraps
-`GenericExecutor`, so concurrency control, rate limiting, and duration handling are identical.
-The only difference between the scenarios is what each iteration *does*, which is exactly the
-variable under test.
+### Worker code
 
-The activity configuration (256B payload, no retry, 60s timeout) is specified independently in each
-scenario. These are simple literal values; sharing them via an abstraction would add indirection
-without meaningful deduplication.
+A dedicated activity (`payload`) and a dedicated workflow (`saw`), both minimal:
 
-### SDK version
+- **`payload` activity**: Takes `[]byte` input and `int32` output size, returns `[]byte` of
+  requested size. (This activity already exists in the kitchen-sink worker as `"payload"` with
+  exactly this signature. We write our own to avoid depending on the kitchen-sink worker.)
+- **`saw` workflow**: Executes the `payload` activity with the input it receives, returns the
+  result. No signals, queries, updates, or other machinery.
 
-The current `go.temporal.io/sdk v1.40.0` already includes `client.ExecuteActivity` (added in
-v1.40.0, commit `215920a6`). No upgrade needed.
+Both are registered on the Go worker alongside the existing kitchen-sink registrations.
 
 ### Activity configuration
 
-Both scenarios use the `payload` activity type (already registered in the Go worker as `"payload"`).
-Arguments: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No heartbeat. Retry policy
-`MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
+Both scenarios use: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No heartbeat.
+Retry policy `MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
+
+### SDK version
+
+`go.temporal.io/sdk v1.40.0` already includes `client.ExecuteActivity`. No upgrade needed.
 
 ## Implementation steps
 
-### Step 1: Create `scenarios/saa_cogs_saw.go`
+### Step 1: Add worker code
 
-`KitchenSinkExecutor` with a single `ActionSet` containing a `PayloadActivity(256, 256)` action
-(with `MaximumAttempts: 1`, `ScheduleToCloseTimeout: 60s`) followed by a `ReturnResultAction`.
+In `workers/go/`, add a small file registering:
+- Activity `"payload"` — takes `(ctx, []byte, int32)`, returns `([]byte, error)`
+- Workflow `"saw"` — executes `"payload"` activity with its input, returns result
+
+These are registered on the worker alongside existing kitchen-sink registrations.
+
+**Wait — the existing worker already registers `"payload"` with the same signature.** We should
+reuse that registration rather than duplicate it. The question is whether we also need a separate
+worker binary or can share the existing one. The existing Go worker registers the kitchen-sink
+workflow plus all activities including `"payload"`. For SAW we just need to also register our `saw`
+workflow. For SAA we need no workflow at all — just the `"payload"` activity, which is already
+registered.
+
+Decision: add `saw` workflow registration to the existing Go worker. No new worker binary needed.
+
+### Step 2: Create `scenarios/saw.go`
+
+`GenericExecutor` whose `Execute` function:
+1. Calls `run.Client.ExecuteWorkflow()` starting workflow `"saw"` with the payload input.
+2. Calls `handle.Get()` to wait for result.
 
-### Step 2: Create `scenarios/saa_cogs_saa.go`
+### Step 3: Create `scenarios/saa.go`
 
 `GenericExecutor` whose `Execute` function:
 1. Calls `run.Client.ExecuteActivity()` with `StartActivityOptions` (ID derived from
-   run/execution/iteration, task queue from `run.TaskQueue()`, same timeout and retry policy as SAW).
-2. Passes activity type `"payload"` by name with `[]byte` (256 zeros) and `int32(256)` as args.
-3. Calls `handle.Get()` to wait for the result.
+   run/execution/iteration, task queue from `run.TaskQueue()`, same timeout and retry policy).
+2. Passes activity type `"payload"` by name with `[]byte` (256 zeros) and `int32(256)`.
+3. Calls `handle.Get()` to wait for result.
 
-### Step 3: Create `commands.sh`
+### Step 4: Create `commands.sh`
 
 Useful shell commands with terse comments for:
 - Local testing with `--embedded-server`
 - Cloud cell verification via `ct`
 - Running scenarios against `s-saa-cogs`
 
-### Step 4: Test locally
+### Step 5: Test locally
 
 - `go build ./...` and `go vet ./...`
 - `go run ./cmd list-scenarios` shows both new scenarios
-- SAW: `go run ./cmd run-scenario-with-worker --scenario saa_cogs_saw --language go --iterations 5 --embedded-server`
-- SAA: same command with `saa_cogs_saa` — will get "Standalone activity is disabled" from the dev
+- SAW: `go run ./cmd run-scenario-with-worker --scenario saw --language go --iterations 5 --embedded-server`
+- SAA: same command with `saa` — will get "Standalone activity is disabled" from the embedded dev
   server (v1.30.1 doesn't have the feature flag), confirming the code path reaches
   `StartActivityExecution`. Will succeed on the cloud cell.
 
-### Step 5: Connect to cloud cell
+### Step 6: Connect to cloud cell
 
 1. Verify cell: `ct kubectl --context s-saa-cogs get pods -n temporal`
 2. Check namespace: `ct admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e`
@@ -90,7 +105,7 @@ Useful shell commands with terse comments for:
 
 1. **Build**: `go build ./...` succeeds.
 2. **Lint/vet**: `go vet ./...` clean on our files.
-3. **List scenarios**: `go run ./cmd list-scenarios` includes both `saa_cogs_saw` and `saa_cogs_saa`.
+3. **List scenarios**: `go run ./cmd list-scenarios` includes both `saw` and `saa`.
 4. **Local test — SAW**: `run-scenario-with-worker --embedded-server --iterations 5` completes.
 5. **Local test — SAA**: Same command hits `StartActivityExecution` on the server (expected to fail
    on dev server with "disabled" error; succeeds on cloud cell with CHASM enabled).

From f414847d0d2e6d23fe6c3f350ce2434e9d1010a9 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 10:33:11 -0400
Subject: [PATCH 08/40] Update plan: rename scenarios, reuse existing payload
 activity

---
 .task/plan.md | 62 +++++++++++++++++++--------------------------------
 1 file changed, 23 insertions(+), 39 deletions(-)

diff --git a/.task/plan.md b/.task/plan.md
index 08e81f3e..00587a64 100644
--- a/.task/plan.md
+++ b/.task/plan.md
@@ -1,4 +1,4 @@
-# Implementation Plan: SAA COGS Load Generation
+# Implementation Plan: SAA Load Generation
 
 ## Goal
 
@@ -13,31 +13,27 @@ Both scenarios use `GenericExecutor` with a simple `Execute` function. This keep
 implementations symmetric — the only difference is what each iteration does, which is exactly the
 variable under test.
 
-**`saw`** — Single Activity Workflow baseline. Each iteration calls `client.ExecuteWorkflow` with a
-dedicated minimal workflow (`saw`) that executes one payload activity and returns. Then
-`handle.Get()` to wait for the result.
+**`workflow_with_single_activity`** — Each iteration calls `client.ExecuteWorkflow` with a dedicated
+minimal workflow that executes one `payload` activity and returns. Then `handle.Get()`.
 
-**`saa`** — Standalone Activity. Each iteration calls `client.ExecuteActivity` with the same payload
-activity. Then `handle.Get()` to wait for the result. No workflow involved.
+**`standalone_activity`** — Each iteration calls `client.ExecuteActivity` with the same `payload`
+activity. Then `handle.Get()`. No workflow involved.
 
 Both use the same task queue (derived from run-id) and the same Go worker.
 
 ### Worker code
 
-A dedicated activity (`payload`) and a dedicated workflow (`saw`), both minimal:
+Reuse the existing `payload` activity at [kitchen_sink.go:511-516](workers/go/kitchensink/kitchen_sink.go#L511-L516),
+already registered as `"payload"` at [worker.go:105](workers/go/worker/worker.go#L105).
 
-- **`payload` activity**: Takes `[]byte` input and `int32` output size, returns `[]byte` of
-  requested size. (This activity already exists in the kitchen-sink worker as `"payload"` with
-  exactly this signature. We write our own to avoid depending on the kitchen-sink worker.)
-- **`saw` workflow**: Executes the `payload` activity with the input it receives, returns the
-  result. No signals, queries, updates, or other machinery.
-
-Both are registered on the Go worker alongside the existing kitchen-sink registrations.
+Add one new workflow: a minimal function that executes the `payload` activity with its input and
+returns the result. Register it on the existing Go worker at [worker.go:102](workers/go/worker/worker.go#L102)
+alongside the existing registrations. No new worker binary needed.
 
 ### Activity configuration
 
-Both scenarios use: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No heartbeat.
-Retry policy `MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
+Both scenarios: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No heartbeat.
+`MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
 
 ### SDK version
 
@@ -45,30 +41,18 @@ Retry policy `MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
 
 ## Implementation steps
 
-### Step 1: Add worker code
-
-In `workers/go/`, add a small file registering:
-- Activity `"payload"` — takes `(ctx, []byte, int32)`, returns `([]byte, error)`
-- Workflow `"saw"` — executes `"payload"` activity with its input, returns result
-
-These are registered on the worker alongside existing kitchen-sink registrations.
-
-**Wait — the existing worker already registers `"payload"` with the same signature.** We should
-reuse that registration rather than duplicate it. The question is whether we also need a separate
-worker binary or can share the existing one. The existing Go worker registers the kitchen-sink
-workflow plus all activities including `"payload"`. For SAW we just need to also register our `saw`
-workflow. For SAA we need no workflow at all — just the `"payload"` activity, which is already
-registered.
+### Step 1: Add workflow to worker
 
-Decision: add `saw` workflow registration to the existing Go worker. No new worker binary needed.
+Add a small file under `workers/go/` with the minimal workflow function. Register it in
+[worker.go](workers/go/worker/worker.go) alongside existing registrations.
 
-### Step 2: Create `scenarios/saw.go`
+### Step 2: Create `scenarios/workflow_with_single_activity.go`
 
 `GenericExecutor` whose `Execute` function:
-1. Calls `run.Client.ExecuteWorkflow()` starting workflow `"saw"` with the payload input.
+1. Calls `run.Client.ExecuteWorkflow()` starting the new workflow with the payload input.
 2. Calls `handle.Get()` to wait for result.
 
-### Step 3: Create `scenarios/saa.go`
+### Step 3: Create `scenarios/standalone_activity.go`
 
 `GenericExecutor` whose `Execute` function:
 1. Calls `run.Client.ExecuteActivity()` with `StartActivityOptions` (ID derived from
@@ -87,9 +71,9 @@ Useful shell commands with terse comments for:
 
 - `go build ./...` and `go vet ./...`
 - `go run ./cmd list-scenarios` shows both new scenarios
-- SAW: `go run ./cmd run-scenario-with-worker --scenario saw --language go --iterations 5 --embedded-server`
-- SAA: same command with `saa` — will get "Standalone activity is disabled" from the embedded dev
-  server (v1.30.1 doesn't have the feature flag), confirming the code path reaches
+- SAW: `go run ./cmd run-scenario-with-worker --scenario workflow_with_single_activity --language go --iterations 5 --embedded-server`
+- SAA: same command with `standalone_activity` — will get "Standalone activity is disabled" from the
+  embedded dev server (v1.30.1 doesn't have the feature flag), confirming the code path reaches
   `StartActivityExecution`. Will succeed on the cloud cell.
 
 ### Step 6: Connect to cloud cell
@@ -105,8 +89,8 @@ Useful shell commands with terse comments for:
 
 1. **Build**: `go build ./...` succeeds.
 2. **Lint/vet**: `go vet ./...` clean on our files.
-3. **List scenarios**: `go run ./cmd list-scenarios` includes both `saw` and `saa`.
+3. **List scenarios**: `go run ./cmd list-scenarios` includes both names.
 4. **Local test — SAW**: `run-scenario-with-worker --embedded-server --iterations 5` completes.
 5. **Local test — SAA**: Same command hits `StartActivityExecution` on the server (expected to fail
    on dev server with "disabled" error; succeeds on cloud cell with CHASM enabled).
-6. **Cloud cell proof-of-concept**: Dashboard shows idle → run scenario → dashboard shows activity.
+6. **Cloud cell proof-of-concept**: Dashboard shows idle -> run scenario -> dashboard shows activity.

From 786be36a33fb04160871cd0b77bfbf3046ed07b6 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 10:38:31 -0400
Subject: [PATCH 09/40] painting by numbers

---
 .task/plan.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.task/plan.md b/.task/plan.md
index 00587a64..acc2e53d 100644
--- a/.task/plan.md
+++ b/.task/plan.md
@@ -41,6 +41,20 @@ Both scenarios: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No
 
 ## Implementation steps
 
+IMPORTANT: Rather than doing the implementation yourself, please "teach" the user to do the
+implementation themselves. Take a "painting by numbers" approach: Decide on the first component they
+should write, and insert a comment in the code indicating what they should do. Then pause and give
+them a clickable links to the comment, and to any existing prior art in the codebase they might want
+to refer to. Don't output code directly to them. Work with them to complete the stage; review their
+work carefully. Do not consider the stage complete until the work is done to an equal or greater
+standard than you yourself would have achieved. When that stage is completed by them, or with
+further assistance from you, move on to the next component to be implemented and repeat this
+procedure.
+
+Regarding names: we will not use "cogs" anywhere in omes code itself. Conceptually, the omes code is
+defining SAW and SAA workloads. What those are used for (to run an experiment) and why (COGS
+investigation) is not the concern of the omes code.
+
 ### Step 1: Add workflow to worker
 
 Add a small file under `workers/go/` with the minimal workflow function. Register it in

From a29561e78ad36e7567cd8a988b30a54b93d568b0 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 11:35:45 -0400
Subject: [PATCH 10/40] Document Payload activity

---
 workers/go/kitchensink/kitchen_sink.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workers/go/kitchensink/kitchen_sink.go b/workers/go/kitchensink/kitchen_sink.go
index 21551c18..fa977496 100644
--- a/workers/go/kitchensink/kitchen_sink.go
+++ b/workers/go/kitchensink/kitchen_sink.go
@@ -508,6 +508,7 @@ func Noop(_ context.Context) error {
 	return nil
 }
 
+// Payload is an activity that takes arbitrary bytes input and returns a bytes result of size `bytesToReturn`.
 func Payload(_ context.Context, inputData []byte, bytesToReturn int32) ([]byte, error) {
 	output := make([]byte, bytesToReturn)
 	//goland:noinspection GoDeprecation -- This is fine. We don't need crypto security.

From cc4df14eab96c6e060eb7840218fa82d5e3c3bae Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 11:37:13 -0400
Subject: [PATCH 11/40] SingleActivityWorkflow: implement and register

---
 workers/go/singleactivityworkflow/workflow.go | 20 +++++++++++++++++++
 workers/go/worker/worker.go                   |  2 ++
 2 files changed, 22 insertions(+)
 create mode 100644 workers/go/singleactivityworkflow/workflow.go

diff --git a/workers/go/singleactivityworkflow/workflow.go b/workers/go/singleactivityworkflow/workflow.go
new file mode 100644
index 00000000..95c394c7
--- /dev/null
+++ b/workers/go/singleactivityworkflow/workflow.go
@@ -0,0 +1,20 @@
+package singleactivityworkflow
+
+import (
+	"time"
+
+	"go.temporal.io/sdk/temporal"
+	"go.temporal.io/sdk/workflow"
+)
+
+func SingleActivityWorkflow(ctx workflow.Context, input []byte, outputSize int32) ([]byte, error) {
+	var output []byte
+	err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{
+		StartToCloseTimeout: 5 * time.Second,
+		RetryPolicy:         &temporal.RetryPolicy{MaximumAttempts: 1},
+	}), "payload", input, outputSize).Get(ctx, &output)
+	if err != nil {
+		return nil, err
+	}
+	return output, nil
+}
diff --git a/workers/go/worker/worker.go b/workers/go/worker/worker.go
index 40ffc757..aaab61cc 100644
--- a/workers/go/worker/worker.go
+++ b/workers/go/worker/worker.go
@@ -9,6 +9,7 @@ import (
 	"github.com/temporalio/omes/workers/go/ebbandflow"
 	"github.com/temporalio/omes/workers/go/kitchensink"
 	"github.com/temporalio/omes/workers/go/schedulerstress"
+	"github.com/temporalio/omes/workers/go/singleactivityworkflow"
 	"go.temporal.io/sdk/activity"
 	"go.temporal.io/sdk/client"
 	"go.temporal.io/sdk/worker"
@@ -112,6 +113,7 @@ func runWorkers(client client.Client, taskQueues []string, options clioptions.Wo
 			w.RegisterActivity(&ebbFlowActivities)
 			w.RegisterWorkflowWithOptions(schedulerstress.NoopScheduledWorkflow, workflow.RegisterOptions{Name: "NoopScheduledWorkflow"})
 			w.RegisterWorkflowWithOptions(schedulerstress.SleepScheduledWorkflow, workflow.RegisterOptions{Name: "SleepScheduledWorkflow"})
+			w.RegisterWorkflow(singleactivityworkflow.SingleActivityWorkflow)
 			w.RegisterNexusService(service)
 			errCh <- w.Run(worker.InterruptCh())
 		}()

From 83b4610d6ef2f820a5d1ca1c4e85f19b50628361 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 12:06:37 -0400
Subject: [PATCH 12/40] Register scenario

---
 scenarios/standalone_activity.go           | 50 ++++++++++++++++++++++
 scenarios/workflow_with_single_activity.go | 32 ++++++++++++++
 workers/go/worker/worker.go                |  2 +-
 3 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 scenarios/standalone_activity.go
 create mode 100644 scenarios/workflow_with_single_activity.go

diff --git a/scenarios/standalone_activity.go b/scenarios/standalone_activity.go
new file mode 100644
index 00000000..d55547f0
--- /dev/null
+++ b/scenarios/standalone_activity.go
@@ -0,0 +1,50 @@
+package scenarios
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/temporalio/omes/loadgen"
+	"go.temporal.io/sdk/client"
+	"go.temporal.io/sdk/temporal"
+)
+
+func init() {
+	loadgen.MustRegisterScenario(loadgen.Scenario{
+		Description: "Run a standalone activity. The activity takes in some bytes and returns some bytes. " +
+			"It never retries or heartbeats.",
+		ExecutorFn: func() loadgen.Executor {
+			return &loadgen.GenericExecutor{
+				Execute: func(ctx context.Context, r *loadgen.Run) error {
+					payloadSize := r.ScenarioOptionInt("payload-size", 0)
+					handle, err := r.Client.ExecuteActivity(
+						ctx,
+						activityOptions(r),
+						"payload",
+						make([]byte, payloadSize),
+						int32(payloadSize),
+					)
+					if err != nil {
+						return err
+					}
+					return handle.Get(ctx, nil)
+				},
+			}
+		},
+	})
+}
+
+func activityOptions(r *loadgen.Run) client.StartActivityOptions {
+	return client.StartActivityOptions{
+		ID: fmt.Sprintf(
+			"a-%s-%s-%d",
+			r.RunID,
+			r.ExecutionID,
+			r.Iteration,
+		),
+		TaskQueue:           r.TaskQueue(),
+		StartToCloseTimeout: 5 * time.Second,
+		RetryPolicy:         &temporal.RetryPolicy{MaximumAttempts: 1},
+	}
+}
diff --git a/scenarios/workflow_with_single_activity.go b/scenarios/workflow_with_single_activity.go
new file mode 100644
index 00000000..ef47a1de
--- /dev/null
+++ b/scenarios/workflow_with_single_activity.go
@@ -0,0 +1,32 @@
+package scenarios
+
+import (
+	"context"
+
+	"github.com/temporalio/omes/loadgen"
+)
+
+func init() {
+	loadgen.MustRegisterScenario(loadgen.Scenario{
+		Description: "Run a single-activity workflow. It takes in some bytes, passes them to an " +
+			"activity, and returns the bytes returned by the activity. The activity never retries or heartbeats.",
+		ExecutorFn: func() loadgen.Executor {
+			return &loadgen.GenericExecutor{
+				Execute: func(ctx context.Context, r *loadgen.Run) error {
+					payloadSize := r.ScenarioOptionInt("payload-size", 0)
+					handle, err := r.Client.ExecuteWorkflow(
+						ctx,
+						r.DefaultStartWorkflowOptions(),
+						"singleActivityWorkflow",
+						make([]byte, payloadSize),
+						int32(payloadSize),
+					)
+					if err != nil {
+						return err
+					}
+					return handle.Get(ctx, nil)
+				},
+			}
+		},
+	})
+}
diff --git a/workers/go/worker/worker.go b/workers/go/worker/worker.go
index aaab61cc..fb1bc791 100644
--- a/workers/go/worker/worker.go
+++ b/workers/go/worker/worker.go
@@ -113,7 +113,7 @@ func runWorkers(client client.Client, taskQueues []string, options clioptions.Wo
 			w.RegisterActivity(&ebbFlowActivities)
 			w.RegisterWorkflowWithOptions(schedulerstress.NoopScheduledWorkflow, workflow.RegisterOptions{Name: "NoopScheduledWorkflow"})
 			w.RegisterWorkflowWithOptions(schedulerstress.SleepScheduledWorkflow, workflow.RegisterOptions{Name: "SleepScheduledWorkflow"})
-			w.RegisterWorkflow(singleactivityworkflow.SingleActivityWorkflow)
+			w.RegisterWorkflowWithOptions(singleactivityworkflow.SingleActivityWorkflow, workflow.RegisterOptions{Name: "singleActivityWorkflow"})
 			w.RegisterNexusService(service)
 			errCh <- w.Run(worker.InterruptCh())
 		}()

From a77c193dde975d97b3d00e2c8151d3f9a560a8e3 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 12:51:06 -0400
Subject: [PATCH 13/40] commands

---
 commands.sh | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 commands.sh

diff --git a/commands.sh b/commands.sh
new file mode 100644
index 00000000..b214633c
--- /dev/null
+++ b/commands.sh
@@ -0,0 +1,41 @@
+# Shell commands for SAA/SAW load generation scenarios.
+
+# --- Local testing (embedded dev server) ---
+
+go run ./cmd run-scenario-with-worker --scenario workflow_with_single_activity --language go --iterations 5 --embedded-server --option payload-size=1024
+
+go run ./cmd run-scenario-with-worker --scenario standalone_activity --language go --iterations 5 --embedded-server --option payload-size=1024
+
+# --- Cloud cell: s-saa-cogs ---
+
+# List all k8s namespaces on the cell
+ct kubectl --context s-saa-cogs get namespaces
+
+# Verify cell is up
+ct kubectl --context s-saa-cogs get pods -n temporal
+
+# Check namespace
+ct admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e
+
+# Run worker (in one terminal)
+go run ./workers/go --task-queue omes \
+  --server-address TODO \
+  --namespace s-saa-cogs-marathon.e2e \
+  --tls-cert-path TODO \
+  --tls-key-path TODO
+
+# Run SAW scenario
+go run ./cmd run-scenario --scenario workflow_with_single_activity \
+  --server-address TODO \
+  --namespace s-saa-cogs-marathon.e2e \
+  --tls-cert-path TODO \
+  --tls-key-path TODO \
+  --iterations 100 --max-concurrent 10
+
+# Run SAA scenario
+go run ./cmd run-scenario --scenario standalone_activity \
+  --server-address TODO \
+  --namespace s-saa-cogs-marathon.e2e \
+  --tls-cert-path TODO \
+  --tls-key-path TODO \
+  --iterations 100 --max-concurrent 10

From b17844e4a0f8a7acf209b10b662a2d5c37fecdcb Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 14:02:45 -0400
Subject: [PATCH 14/40] Add cell info

---
 .task/task.md | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/.task/task.md b/.task/task.md
index b9c71259..b9b47cc3 100644
--- a/.task/task.md
+++ b/.task/task.md
@@ -716,6 +716,45 @@ Your task is to help me design and build the omes-based tooling that we will use
 (1) Add any missing omes functionality that will be needed in order to be able to use omes to generate the SAA and SAW load for the experiments.
 (2) Run the experiments against the cloud cell that Stephen has prepared: its name is s-saa-cogs.
 
+Stephen linked to the 'scaffold' run that created the cell. I see it had the following input:
+
+{
+  "CellConfig": {
+    "Identity": {
+      "Location": {
+        "CloudProvider": "aws",
+        "AccountID": "124355634071",
+        "Region": "us-west-2"
+      },
+      "ID": "s-saa-cogs"
+    },
+    "Template": "v5-aws-dev",
+    "ServerVersion": "v3.151.9_oss1.31.0_151.6",
+    "AgentVersion": "v3.151.9_oss1.31.0_151.6",
+    "WebVersion": "v2.47.0",
+    "GoCanaryVersion": "v1.35.0",
+    "ComponentVersion": "v2026-03-20.00",
+    "WalVersion": "v10.0.3",
+    "EnableMetering": false
+  },
+  "FailurePolicy": 1
+}
+
+and output:
+
+{
+  "Cell": {
+    "Identity": {
+      "Location": {
+        "CloudProvider": "aws",
+        "AccountID": "124355634071",
+        "Region": "us-west-2"
+      },
+      "ID": "s-saa-cogs"
+    }
+  }
+}
+
 I am not familiar with performing operations against cloud cells, so you will need to resarch and help me during this. But we have several good resources: study the contents of the 'oncall' and 'runbooks' repos, and also use the /agent-slack skill. You also have Notion and Temporal Docs MCP. Use the more modern 'ct' rather than its alias 'omni'.
 
 Initial grafana dashboard JSON is at .task/saacogs.json.

From 4a1458d6fdc0443339b469a41eff3120c6cbe833 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 14:18:02 -0400
Subject: [PATCH 15/40] Annotate cell support URL as potentially unavailable
 for dev cells

---
 commands.sh | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/commands.sh b/commands.sh
index b214633c..bdca80ab 100644
--- a/commands.sh
+++ b/commands.sh
@@ -8,14 +8,26 @@ go run ./cmd run-scenario-with-worker --scenario standalone_activity --language
 
 # --- Cloud cell: s-saa-cogs ---
 
+# Cell support page: https://cloud.temporal.io/support/cells/s-saa-cogs
+#   (may not resolve for dev-template cells; use ct ocld / ct kubectl instead)
+# K8s access: ct k9s --readonly --context s-saa-cogs
+
 # List all k8s namespaces on the cell
 ct kubectl --context s-saa-cogs get namespaces
 
 # Verify cell is up
 ct kubectl --context s-saa-cogs get pods -n temporal
 
-# Check namespace
-ct admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e
+# List Temporal namespaces on this cell
+# Web: https://cloud.temporal.io/support/cells/s-saa-cogs (if cell is registered in UI)
+ct ocld namespace db list --active-cluster s-saa-cogs
+
+# Grafana dashboards
+# Overview: https://grafana.tmprl-internal.cloud/d/e613c827-243e-4759-a5ca-3e334201c124/temporal-cloud-overview
+# By namespace: https://grafana.tmprl-internal.cloud/d/iyRCOBD4z/temporal-cloud-external-metrics-by-namespace
+# Frontend: https://grafana.tmprl-internal.cloud/d/SxRYJXZMz/frontend
+# Matching: https://grafana.tmprl-internal.cloud/d/wuh-8uZGk/matching
+# History: https://grafana.tmprl-internal.cloud/d/jh_LXEin2/history
 
 # Run worker (in one terminal)
 go run ./workers/go --task-queue omes \

From f529e1f4e915bc0de7b425c5d56457ca0032c086 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 14:21:28 -0400
Subject: [PATCH 16/40] Fix cell support URL: s-saa* cells use
 staging.thundergun.io

---
 commands.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/commands.sh b/commands.sh
index bdca80ab..42daa729 100644
--- a/commands.sh
+++ b/commands.sh
@@ -8,8 +8,8 @@ go run ./cmd run-scenario-with-worker --scenario standalone_activity --language
 
 # --- Cloud cell: s-saa-cogs ---
 
-# Cell support page: https://cloud.temporal.io/support/cells/s-saa-cogs
-#   (may not resolve for dev-template cells; use ct ocld / ct kubectl instead)
+# Cell support page: https://staging.thundergun.io/support/cells/s-saa-cogs
+#   (s-saa* cells are staging/test cells on thundergun, not cloud.temporal.io)
 # K8s access: ct k9s --readonly --context s-saa-cogs
 
 # List all k8s namespaces on the cell
@@ -19,7 +19,7 @@ ct kubectl --context s-saa-cogs get namespaces
 ct kubectl --context s-saa-cogs get pods -n temporal
 
 # List Temporal namespaces on this cell
-# Web: https://cloud.temporal.io/support/cells/s-saa-cogs (if cell is registered in UI)
+# Web: https://staging.thundergun.io/support/cells/s-saa-cogs
 ct ocld namespace db list --active-cluster s-saa-cogs
 
 # Grafana dashboards

From 479db2d8ea1ee8bc6e0e83d7f4686a0e0e3f58f1 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 14:25:23 -0400
Subject: [PATCH 17/40] Use ct admintools to list Temporal namespaces

---
 commands.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/commands.sh b/commands.sh
index 42daa729..d937b9dc 100644
--- a/commands.sh
+++ b/commands.sh
@@ -20,7 +20,7 @@ ct kubectl --context s-saa-cogs get pods -n temporal
 
 # List Temporal namespaces on this cell
 # Web: https://staging.thundergun.io/support/cells/s-saa-cogs
-ct ocld namespace db list --active-cluster s-saa-cogs
+ct admintools --context s-saa-cogs -- temporal operator namespace list -o json
 
 # Grafana dashboards
 # Overview: https://grafana.tmprl-internal.cloud/d/e613c827-243e-4759-a5ca-3e334201c124/temporal-cloud-overview

From 9d0fcc8af6c13b40bdf47cc3e143315d2e99e8e8 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 14:30:22 -0400
Subject: [PATCH 18/40] Update namespace to match created saa-cogs namespace

---
 commands.sh | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/commands.sh b/commands.sh
index d937b9dc..a6d7e3c6 100644
--- a/commands.sh
+++ b/commands.sh
@@ -22,6 +22,9 @@ ct kubectl --context s-saa-cogs get pods -n temporal
 # Web: https://staging.thundergun.io/support/cells/s-saa-cogs
 ct admintools --context s-saa-cogs -- temporal operator namespace list -o json
 
+# Create a namespace
+ct admintools --context s-saa-cogs -- temporal operator namespace create saa-cogs
+
 # Grafana dashboards
 # Overview: https://grafana.tmprl-internal.cloud/d/e613c827-243e-4759-a5ca-3e334201c124/temporal-cloud-overview
 # By namespace: https://grafana.tmprl-internal.cloud/d/iyRCOBD4z/temporal-cloud-external-metrics-by-namespace
@@ -32,14 +35,14 @@ ct admintools --context s-saa-cogs -- temporal operator namespace list -o json
 # Run worker (in one terminal)
 go run ./workers/go --task-queue omes \
   --server-address TODO \
-  --namespace s-saa-cogs-marathon.e2e \
+  --namespace saa-cogs \
   --tls-cert-path TODO \
   --tls-key-path TODO
 
 # Run SAW scenario
 go run ./cmd run-scenario --scenario workflow_with_single_activity \
   --server-address TODO \
-  --namespace s-saa-cogs-marathon.e2e \
+  --namespace saa-cogs \
   --tls-cert-path TODO \
   --tls-key-path TODO \
   --iterations 100 --max-concurrent 10
@@ -47,7 +50,7 @@ go run ./cmd run-scenario --scenario workflow_with_single_activity \
 # Run SAA scenario
 go run ./cmd run-scenario --scenario standalone_activity \
   --server-address TODO \
-  --namespace s-saa-cogs-marathon.e2e \
+  --namespace saa-cogs \
   --tls-cert-path TODO \
   --tls-key-path TODO \
   --iterations 100 --max-concurrent 10

From c3f80772ba5eec9640bfae1e664c8775714c421d Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 19:01:23 -0400
Subject: [PATCH 19/40] Add units to dashboard

---
 .task/saacogs.json | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.task/saacogs.json b/.task/saacogs.json
index 8654452f..bb30e263 100644
--- a/.task/saacogs.json
+++ b/.task/saacogs.json
@@ -221,7 +221,7 @@
           "refId": "A"
         }
       ],
-      "title": "Frontend RPC by method",
+      "title": "Frontend RPC by method (req/s)",
       "type": "timeseries"
     },
     {
@@ -279,7 +279,7 @@
           "refId": "A"
         }
       ],
-      "title": "History RPC by method",
+      "title": "History RPC by method (req/s)",
       "type": "timeseries"
     },
     {
@@ -337,7 +337,7 @@
           "refId": "A"
         }
       ],
-      "title": "Matching RPC by method",
+      "title": "Matching RPC by method (req/s)",
       "type": "timeseries"
     },
     {
@@ -403,7 +403,7 @@
           "refId": "B"
         }
       ],
-      "title": "Astra writes by table",
+      "title": "Astra writes by table (req/s)",
       "description": "Validate r_Cass = 3/7 for writes. cassandra_query filtered to verb!=select; cassandra_batch is always writes.",
       "type": "timeseries"
     },
@@ -462,7 +462,7 @@
           "refId": "A"
         }
       ],
-      "title": "Astra reads by table",
+      "title": "Astra reads by table (req/s)",
       "description": "Reads are not expected to differ much between SAW and SAA (similar caching, ~1 read on creation).",
       "type": "timeseries"
     },
@@ -521,7 +521,7 @@
           "refId": "A"
         }
       ],
-      "title": "WAL operation rate by type",
+      "title": "WAL operation rate by type (ops/s)",
       "description": "Covers both reads and writes (no separate write-only metric). Expect HISTORY_EVENT_WAL activity for SAW only; both use MUTABLE_STATE_WAL.",
       "type": "timeseries"
     },
@@ -580,7 +580,7 @@
           "refId": "A"
         }
       ],
-      "title": "Visibility persistence rate by operation",
+      "title": "Visibility persistence rate by operation (ops/s)",
       "description": "OSS visibility_persistence_requests counter, tagged by operation (RecordWorkflowExecutionStarted, RecordWorkflowExecutionClosed, UpsertWorkflowExecution, DeleteWorkflowExecution).",
       "type": "timeseries"
     },
@@ -647,7 +647,7 @@
           "refId": "B"
         }
       ],
-      "title": "Sync vs async match rate",
+      "title": "Sync vs async match rate (matches/s)",
       "description": "Health check on experimental conditions. Async match means tasks went through persistence/backlog rather than being dispatched directly to a waiting poller.",
       "type": "timeseries"
     }

From 6c1ac63787ae45829c888c04d086332a4d2380e3 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 19:09:47 -0400
Subject: [PATCH 20/40] commands

---
 commands.sh | 83 +++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 59 insertions(+), 24 deletions(-)

diff --git a/commands.sh b/commands.sh
index a6d7e3c6..91d6a86f 100644
--- a/commands.sh
+++ b/commands.sh
@@ -13,7 +13,7 @@ go run ./cmd run-scenario-with-worker --scenario standalone_activity --language
 # K8s access: ct k9s --readonly --context s-saa-cogs
 
 # List all k8s namespaces on the cell
-ct kubectl --context s-saa-cogs get namespaces
+ct kubectl --context s-saa-cogs get namespaces -o json
 
 # Verify cell is up
 ct kubectl --context s-saa-cogs get pods -n temporal
@@ -22,35 +22,70 @@ ct kubectl --context s-saa-cogs get pods -n temporal
 # Web: https://staging.thundergun.io/support/cells/s-saa-cogs
 ct admintools --context s-saa-cogs -- temporal operator namespace list -o json
 
-# Create a namespace
-ct admintools --context s-saa-cogs -- temporal operator namespace create saa-cogs
+# Create namespace pinned to the cell
+ct ocld test namespace create \
+  --namespace saa-cogs-4.temporal-dev \
+  --region us-west-2 \
+  --cloud-provider aws \
+  --retention 1 \
+  --placement-override-cell-id s-saa-cogs \
+  --auth-method api_key
 
-# Grafana dashboards
-# Overview: https://grafana.tmprl-internal.cloud/d/e613c827-243e-4759-a5ca-3e334201c124/temporal-cloud-overview
-# By namespace: https://grafana.tmprl-internal.cloud/d/iyRCOBD4z/temporal-cloud-external-metrics-by-namespace
-# Frontend: https://grafana.tmprl-internal.cloud/d/SxRYJXZMz/frontend
-# Matching: https://grafana.tmprl-internal.cloud/d/wuh-8uZGk/matching
-# History: https://grafana.tmprl-internal.cloud/d/jh_LXEin2/history
+# DNS should resolve
+nslookup saa-cogs-4.temporal-dev.tmprl-test.cloud
+
+# Namespace should appear on the cell
+ct admintools --context s-saa-cogs -- temporal operator namespace list
+
+# Namespace should be active with API key auth enabled
+# Output contains grpcAddress
+ct ocld test cloud-apis namespaces get -n saa-cogs-4.temporal-dev
+
+export TEMPORAL_API_KEY=xxx
+export TEMPORAL_ADDRESS=us-west-2.aws.api.tmprl-test.cloud:7233
+export TEMPORAL_NAMESPACE=saa-cogs-4.temporal-dev
+export TEMPORAL_TLS=true
+export TEMPORAL_TLS_DISABLE_HOST_VERIFICATION=true
+
+ct admintools --context s-saa-cogs -- temporal operator search-attribute create \
+  --namespace saa-cogs-4.temporal-dev --name OmesExecutionID --type Keyword
 
 # Run worker (in one terminal)
-go run ./workers/go --task-queue omes \
-  --server-address TODO \
-  --namespace saa-cogs \
-  --tls-cert-path TODO \
-  --tls-key-path TODO
+go run ./cmd run-worker \
+  --run-id run-1 \
+  --scenario workflow_with_single_activity \
+  --language go \
+  --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
+  --namespace saa-cogs-4.temporal-dev \
+  --tls \
+  --disable-tls-host-verification \
+  --auth-header "Bearer $TEMPORAL_API_KEY"
 
 # Run SAW scenario
 go run ./cmd run-scenario --scenario workflow_with_single_activity \
-  --server-address TODO \
-  --namespace saa-cogs \
-  --tls-cert-path TODO \
-  --tls-key-path TODO \
-  --iterations 100 --max-concurrent 10
+  --run-id run-1 \
+  --iterations 100 --max-concurrent 10 \
+  --do-not-register-search-attributes \
+  --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
+  --namespace saa-cogs-4.temporal-dev \
+  --tls \
+  --disable-tls-host-verification \
+  --auth-header "Bearer $TEMPORAL_API_KEY"
 
 # Run SAA scenario
 go run ./cmd run-scenario --scenario standalone_activity \
-  --server-address TODO \
-  --namespace saa-cogs \
-  --tls-cert-path TODO \
-  --tls-key-path TODO \
-  --iterations 100 --max-concurrent 10
+  --run-id run-1 \
+  --iterations 100 --max-concurrent 10 \
+  --do-not-register-search-attributes \
+  --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
+  --namespace saa-cogs-4.temporal-dev \
+  --tls \
+  --disable-tls-host-verification \
+  --auth-header "Bearer $TEMPORAL_API_KEY"
+
+# Grafana dashboards
+# Overview: https://grafana.tmprl-internal.cloud/d/e613c827-243e-4759-a5ca-3e334201c124/temporal-cloud-overview
+# By namespace: https://grafana.tmprl-internal.cloud/d/iyRCOBD4z/temporal-cloud-external-metrics-by-namespace
+# Frontend: https://grafana.tmprl-internal.cloud/d/SxRYJXZMz/frontend
+# Matching: https://grafana.tmprl-internal.cloud/d/wuh-8uZGk/matching
+# History: https://grafana.tmprl-internal.cloud/d/jh_LXEin2/history

From d983a466cefe1f9e19a871c36e3d26ad14abdf59 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 21:53:10 -0400
Subject: [PATCH 21/40] Don't do anything wit searxh attributes

---
 scenarios/workflow_with_single_activity.go | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/scenarios/workflow_with_single_activity.go b/scenarios/workflow_with_single_activity.go
index ef47a1de..6930a329 100644
--- a/scenarios/workflow_with_single_activity.go
+++ b/scenarios/workflow_with_single_activity.go
@@ -2,8 +2,10 @@ package scenarios
 
 import (
 	"context"
+	"fmt"
 
 	"github.com/temporalio/omes/loadgen"
+	"go.temporal.io/sdk/client"
 )
 
 func init() {
@@ -16,7 +18,7 @@ func init() {
 					payloadSize := r.ScenarioOptionInt("payload-size", 0)
 					handle, err := r.Client.ExecuteWorkflow(
 						ctx,
-						r.DefaultStartWorkflowOptions(),
+						startWorkflowOptions(r),
 						"singleActivityWorkflow",
 						make([]byte, payloadSize),
 						int32(payloadSize),
@@ -30,3 +32,16 @@ func init() {
 		},
 	})
 }
+
+func startWorkflowOptions(r *loadgen.Run) client.StartWorkflowOptions {
+	return client.StartWorkflowOptions{
+		TaskQueue: loadgen.TaskQueueForRun(r.RunID),
+		ID: fmt.Sprintf(
+			"w-%s-%s-%d",
+			r.RunID,
+			r.ExecutionID,
+			r.Iteration,
+		),
+		WorkflowExecutionErrorWhenAlreadyStarted: !r.Configuration.IgnoreAlreadyStarted,
+	}
+}

From 39e0aa0e1c50b9807e77220f282717adc86f0be2 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 21:54:32 -0400
Subject: [PATCH 22/40] commands

---
 commands.sh | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/commands.sh b/commands.sh
index 91d6a86f..8aa1bf1e 100644
--- a/commands.sh
+++ b/commands.sh
@@ -50,21 +50,16 @@ export TEMPORAL_TLS_DISABLE_HOST_VERIFICATION=true
 ct admintools --context s-saa-cogs -- temporal operator search-attribute create \
   --namespace saa-cogs-4.temporal-dev --name OmesExecutionID --type Keyword
 
-# Run worker (in one terminal)
-go run ./cmd run-worker \
-  --run-id run-1 \
+# SAW
+go run ./cmd run-scenario-with-worker \
   --scenario workflow_with_single_activity \
   --language go \
-  --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
-  --namespace saa-cogs-4.temporal-dev \
-  --tls \
-  --disable-tls-host-verification \
-  --auth-header "Bearer $TEMPORAL_API_KEY"
-
-# Run SAW scenario
-go run ./cmd run-scenario --scenario workflow_with_single_activity \
   --run-id run-1 \
-  --iterations 100 --max-concurrent 10 \
+  --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
+  --worker-max-concurrent-workflow-pollers 40 \
+  --worker-max-concurrent-workflow-tasks 500 \
+  --worker-max-concurrent-activity-pollers 40 \
+  --worker-max-concurrent-activities 500 \
   --do-not-register-search-attributes \
   --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
   --namespace saa-cogs-4.temporal-dev \
@@ -72,10 +67,14 @@ go run ./cmd run-scenario --scenario workflow_with_single_activity \
   --disable-tls-host-verification \
   --auth-header "Bearer $TEMPORAL_API_KEY"
 
-# Run SAA scenario
-go run ./cmd run-scenario --scenario standalone_activity \
+# SAA
+go run ./cmd run-scenario-with-worker \
+  --scenario standalone_activity \
+  --language go \
   --run-id run-1 \
-  --iterations 100 --max-concurrent 10 \
+  --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
+  --worker-max-concurrent-activity-pollers 40 \
+  --worker-max-concurrent-activities 500 \
   --do-not-register-search-attributes \
   --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
   --namespace saa-cogs-4.temporal-dev \
@@ -89,3 +88,8 @@ go run ./cmd run-scenario --scenario standalone_activity \
 # Frontend: https://grafana.tmprl-internal.cloud/d/SxRYJXZMz/frontend
 # Matching: https://grafana.tmprl-internal.cloud/d/wuh-8uZGk/matching
 # History: https://grafana.tmprl-internal.cloud/d/jh_LXEin2/history
+
+ct ocld test dynamic-config namespace get -n saa-cogs-4.temporal-dev
+
+# 88ms RTT
+for i in $(seq 10); do curl -s -o /dev/null -w '%{time_connect}\n' https://us-west-2.aws.api.tmprl-test.cloud:7233; done

From 771f2f6f55c0de4142bf3df7bb1bba278855db89 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Sat, 21 Mar 2026 21:55:17 -0400
Subject: [PATCH 23/40] Fix Go worker flag names to match other language
 workers

WorkerOptions.FlagSet() now takes a prefix parameter. The outer CLI
passes "worker-" (so users write --worker-max-concurrent-activities),
and passthrough() strips it for the subprocess. The Go worker binary
passes "" so it accepts the stripped names, matching dotnet/python/
typescript/java workers.
---
 cmd/cli/run_worker.go       |  2 +-
 cmd/clioptions/worker.go    | 19 ++++++++++---------
 workers/go/worker/worker.go |  2 +-
 workers/run.go              |  2 +-
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/cmd/cli/run_worker.go b/cmd/cli/run_worker.go
index 272b49fa..a1b1b869 100644
--- a/cmd/cli/run_worker.go
+++ b/cmd/cli/run_worker.go
@@ -53,7 +53,7 @@ func (r *workerRunner) addCLIFlags(fs *pflag.FlagSet) {
 	fs.IntVar(&r.TaskQueueIndexSuffixEnd, "task-queue-suffix-index-end", 0, "Inclusive end for task queue suffix range")
 	fs.AddFlagSet(r.ClientOptions.FlagSet())
 	fs.AddFlagSet(r.MetricsOptions.FlagSet("worker-"))
-	fs.AddFlagSet(r.WorkerOptions.FlagSet())
+	fs.AddFlagSet(r.WorkerOptions.FlagSet("worker-"))
 }
 
 func (r *workerRunner) preRun() {
diff --git a/cmd/clioptions/worker.go b/cmd/clioptions/worker.go
index 82f4cc9e..2df633dd 100644
--- a/cmd/clioptions/worker.go
+++ b/cmd/clioptions/worker.go
@@ -20,19 +20,20 @@ type WorkerOptions struct {
 }
 
 // FlagSet adds the relevant flags to populate the options struct and returns a pflag.FlagSet.
-func (m *WorkerOptions) FlagSet() *pflag.FlagSet {
+// The prefix is prepended to each flag name (e.g. "worker-" for the outer CLI, "" for worker binaries).
+func (m *WorkerOptions) FlagSet(prefix string) *pflag.FlagSet {
 	if m.fs != nil {
 		return m.fs
 	}
 	m.fs = pflag.NewFlagSet("worker_options", pflag.ExitOnError)
 	m.fs.StringVar(&m.BuildID, "build-id", "", "Build ID")
-	m.fs.IntVar(&m.MaxConcurrentActivityPollers, "worker-max-concurrent-activity-pollers", 0, "Max concurrent activity pollers")
-	m.fs.IntVar(&m.MaxConcurrentWorkflowPollers, "worker-max-concurrent-workflow-pollers", 0, "Max concurrent workflow pollers")
-	m.fs.IntVar(&m.MaxConcurrentActivities, "worker-max-concurrent-activities", 0, "Max concurrent activities")
-	m.fs.IntVar(&m.MaxConcurrentWorkflowTasks, "worker-max-concurrent-workflow-tasks", 0, "Max concurrent workflow tasks")
-	m.fs.IntVar(&m.ActivityPollerAutoscaleMax, "worker-activity-poller-autoscale-max", 0, "Max for activity poller autoscaling (overrides max-concurrent-activity-pollers")
-	m.fs.IntVar(&m.WorkflowPollerAutoscaleMax, "worker-workflow-poller-autoscale-max", 0, "Max for workflow poller autoscaling (overrides max-concurrent-workflow-pollers")
-	m.fs.Float64Var(&m.WorkerActivitiesPerSecond, "worker-activities-per-second", 0, "Per-worker activity rate limit")
-	m.fs.BoolVar(&m.ErrOnUnimplemented, "worker-err-on-unimplemented", false, "Fail on unimplemented actions (currently this only applies to concurrent client actions)")
+	m.fs.IntVar(&m.MaxConcurrentActivityPollers, prefix+"max-concurrent-activity-pollers", 0, "Max concurrent activity pollers")
+	m.fs.IntVar(&m.MaxConcurrentWorkflowPollers, prefix+"max-concurrent-workflow-pollers", 0, "Max concurrent workflow pollers")
+	m.fs.IntVar(&m.MaxConcurrentActivities, prefix+"max-concurrent-activities", 0, "Max concurrent activities")
+	m.fs.IntVar(&m.MaxConcurrentWorkflowTasks, prefix+"max-concurrent-workflow-tasks", 0, "Max concurrent workflow tasks")
+	m.fs.IntVar(&m.ActivityPollerAutoscaleMax, prefix+"activity-poller-autoscale-max", 0, "Max for activity poller autoscaling (overrides max-concurrent-activity-pollers)")
+	m.fs.IntVar(&m.WorkflowPollerAutoscaleMax, prefix+"workflow-poller-autoscale-max", 0, "Max for workflow poller autoscaling (overrides max-concurrent-workflow-pollers)")
+	m.fs.Float64Var(&m.WorkerActivitiesPerSecond, prefix+"activities-per-second", 0, "Per-worker activity rate limit")
+	m.fs.BoolVar(&m.ErrOnUnimplemented, prefix+"err-on-unimplemented", false, "Fail on unimplemented actions (currently this only applies to concurrent client actions)")
 	return m.fs
 }
diff --git a/workers/go/worker/worker.go b/workers/go/worker/worker.go
index fb1bc791..a221c7e0 100644
--- a/workers/go/worker/worker.go
+++ b/workers/go/worker/worker.go
@@ -138,7 +138,7 @@ func Main() {
 	cmd.Flags().AddFlagSet(app.loggingOptions.FlagSet())
 	cmd.Flags().AddFlagSet(app.clientOptions.FlagSet())
 	cmd.Flags().AddFlagSet(app.metricsOptions.FlagSet(""))
-	cmd.Flags().AddFlagSet(app.workerOptions.FlagSet())
+	cmd.Flags().AddFlagSet(app.workerOptions.FlagSet(""))
 	cmd.Flags().StringVarP(&app.taskQueue, "task-queue", "q", "omes", "Task queue to use")
 	cmd.Flags().IntVar(&app.taskQueueIndexSuffixStart,
 		"task-queue-suffix-index-start", 0, "Inclusive start for task queue suffix range")
diff --git a/workers/run.go b/workers/run.go
index 9ca84150..feb639ba 100644
--- a/workers/run.go
+++ b/workers/run.go
@@ -131,7 +131,7 @@ func (r *Runner) Run(ctx context.Context, baseDir string) error {
 	args = append(args, passthrough(r.ClientOptions.FlagSet(), "")...)
 	args = append(args, passthrough(r.LoggingOptions.FlagSet(), "")...)
 	args = append(args, passthroughExcluding(r.MetricsOptions.FlagSet("worker-"), "worker-", "process-metrics-address", "metrics-version-tag")...)
-	args = append(args, passthrough(r.WorkerOptions.FlagSet(), "worker-")...)
+	args = append(args, passthrough(r.WorkerOptions.FlagSet("worker-"), "worker-")...)
 
 	cmd, err := prog.NewCommand(context.Background(), args...)
 	if err != nil {

From 4a5bafa9119fafc48f86b67f9de1e97609bf2513 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Mon, 23 Mar 2026 13:19:42 -0400
Subject: [PATCH 24/40] Clean up commands

---
 commands.sh | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/commands.sh b/commands.sh
index 8aa1bf1e..c0e53038 100644
--- a/commands.sh
+++ b/commands.sh
@@ -3,13 +3,11 @@
 # --- Local testing (embedded dev server) ---
 
 go run ./cmd run-scenario-with-worker --scenario workflow_with_single_activity --language go --iterations 5 --embedded-server --option payload-size=1024
-
 go run ./cmd run-scenario-with-worker --scenario standalone_activity --language go --iterations 5 --embedded-server --option payload-size=1024
 
 # --- Cloud cell: s-saa-cogs ---
 
 # Cell support page: https://staging.thundergun.io/support/cells/s-saa-cogs
-#   (s-saa* cells are staging/test cells on thundergun, not cloud.temporal.io)
 # K8s access: ct k9s --readonly --context s-saa-cogs
 
 # List all k8s namespaces on the cell
@@ -55,7 +53,7 @@ go run ./cmd run-scenario-with-worker \
   --scenario workflow_with_single_activity \
   --language go \
   --run-id run-1 \
-  --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
+  --duration 1h --max-concurrent 500 --max-iterations-per-second 50 \
   --worker-max-concurrent-workflow-pollers 40 \
   --worker-max-concurrent-workflow-tasks 500 \
   --worker-max-concurrent-activity-pollers 40 \
@@ -82,13 +80,6 @@ go run ./cmd run-scenario-with-worker \
   --disable-tls-host-verification \
   --auth-header "Bearer $TEMPORAL_API_KEY"
 
-# Grafana dashboards
-# Overview: https://grafana.tmprl-internal.cloud/d/e613c827-243e-4759-a5ca-3e334201c124/temporal-cloud-overview
-# By namespace: https://grafana.tmprl-internal.cloud/d/iyRCOBD4z/temporal-cloud-external-metrics-by-namespace
-# Frontend: https://grafana.tmprl-internal.cloud/d/SxRYJXZMz/frontend
-# Matching: https://grafana.tmprl-internal.cloud/d/wuh-8uZGk/matching
-# History: https://grafana.tmprl-internal.cloud/d/jh_LXEin2/history
-
 ct ocld test dynamic-config namespace get -n saa-cogs-4.temporal-dev
 
 # 88ms RTT

From 312bbe33950a1cd33899015a4d79f3037e29fe83 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Mon, 23 Mar 2026 13:20:48 -0400
Subject: [PATCH 25/40] cleanup

---
 .task/AGENTS.md    |   3 -
 .task/plan.md      | 110 -------
 .task/research.md  | 159 ----------
 .task/saacogs.json | 703 -----------------------------------------
 .task/task.md      | 764 ---------------------------------------------
 AGENTS.md          |   1 -
 CLAUDE.md          |   1 -
 7 files changed, 1741 deletions(-)
 delete mode 100644 .task/AGENTS.md
 delete mode 100644 .task/plan.md
 delete mode 100644 .task/research.md
 delete mode 100644 .task/saacogs.json
 delete mode 100644 .task/task.md
 delete mode 120000 AGENTS.md
 delete mode 120000 CLAUDE.md

diff --git a/.task/AGENTS.md b/.task/AGENTS.md
deleted file mode 100644
index ff9f7032..00000000
--- a/.task/AGENTS.md
+++ /dev/null
@@ -1,3 +0,0 @@
-At the start of the conversation output the following so that I know you've read these instructions:
-
-📖 omes:saa-cogs
diff --git a/.task/plan.md b/.task/plan.md
deleted file mode 100644
index acc2e53d..00000000
--- a/.task/plan.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# Implementation Plan: SAA Load Generation
-
-## Goal
-
-Create two omes scenarios to generate SAW and SAA workloads against cloud cell `s-saa-cogs`, then
-observe metrics on the Grafana dashboard.
-
-## Design
-
-### Scenarios
-
-Both scenarios use `GenericExecutor` with a simple `Execute` function. This keeps the
-implementations symmetric — the only difference is what each iteration does, which is exactly the
-variable under test.
-
-**`workflow_with_single_activity`** — Each iteration calls `client.ExecuteWorkflow` with a dedicated
-minimal workflow that executes one `payload` activity and returns. Then `handle.Get()`.
-
-**`standalone_activity`** — Each iteration calls `client.ExecuteActivity` with the same `payload`
-activity. Then `handle.Get()`. No workflow involved.
-
-Both use the same task queue (derived from run-id) and the same Go worker.
-
-### Worker code
-
-Reuse the existing `payload` activity at [kitchen_sink.go:511-516](workers/go/kitchensink/kitchen_sink.go#L511-L516),
-already registered as `"payload"` at [worker.go:105](workers/go/worker/worker.go#L105).
-
-Add one new workflow: a minimal function that executes the `payload` activity with its input and
-returns the result. Register it on the existing Go worker at [worker.go:102](workers/go/worker/worker.go#L102)
-alongside the existing registrations. No new worker binary needed.
-
-### Activity configuration
-
-Both scenarios: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No heartbeat.
-`MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
-
-### SDK version
-
-`go.temporal.io/sdk v1.40.0` already includes `client.ExecuteActivity`. No upgrade needed.
-
-## Implementation steps
-
-IMPORTANT: Rather than doing the implementation yourself, please "teach" the user to do the
-implementation themselves. Take a "painting by numbers" approach: Decide on the first component they
-should write, and insert a comment in the code indicating what they should do. Then pause and give
-them a clickable links to the comment, and to any existing prior art in the codebase they might want
-to refer to. Don't output code directly to them. Work with them to complete the stage; review their
-work carefully. Do not consider the stage complete until the work is done to an equal or greater
-standard than you yourself would have achieved. When that stage is completed by them, or with
-further assistance from you, move on to the next component to be implemented and repeat this
-procedure.
-
-Regarding names: we will not use "cogs" anywhere in omes code itself. Conceptually, the omes code is
-defining SAW and SAA workloads. What those are used for (to run an experiment) and why (COGS
-investigation) is not the concern of the omes code.
-
-### Step 1: Add workflow to worker
-
-Add a small file under `workers/go/` with the minimal workflow function. Register it in
-[worker.go](workers/go/worker/worker.go) alongside existing registrations.
-
-### Step 2: Create `scenarios/workflow_with_single_activity.go`
-
-`GenericExecutor` whose `Execute` function:
-1. Calls `run.Client.ExecuteWorkflow()` starting the new workflow with the payload input.
-2. Calls `handle.Get()` to wait for result.
-
-### Step 3: Create `scenarios/standalone_activity.go`
-
-`GenericExecutor` whose `Execute` function:
-1. Calls `run.Client.ExecuteActivity()` with `StartActivityOptions` (ID derived from
-   run/execution/iteration, task queue from `run.TaskQueue()`, same timeout and retry policy).
-2. Passes activity type `"payload"` by name with `[]byte` (256 zeros) and `int32(256)`.
-3. Calls `handle.Get()` to wait for result.
-
-### Step 4: Create `commands.sh`
-
-Useful shell commands with terse comments for:
-- Local testing with `--embedded-server`
-- Cloud cell verification via `ct`
-- Running scenarios against `s-saa-cogs`
-
-### Step 5: Test locally
-
-- `go build ./...` and `go vet ./...`
-- `go run ./cmd list-scenarios` shows both new scenarios
-- SAW: `go run ./cmd run-scenario-with-worker --scenario workflow_with_single_activity --language go --iterations 5 --embedded-server`
-- SAA: same command with `standalone_activity` — will get "Standalone activity is disabled" from the
-  embedded dev server (v1.30.1 doesn't have the feature flag), confirming the code path reaches
-  `StartActivityExecution`. Will succeed on the cloud cell.
-
-### Step 6: Connect to cloud cell
-
-1. Verify cell: `ct kubectl --context s-saa-cogs get pods -n temporal`
-2. Check namespace: `ct admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e`
-3. Obtain operator TLS certs (from k8s secrets via `ct`, or ask Stephen)
-4. Point Grafana dashboard at `s-saa-cogs`, observe idle state
-5. Run worker + SAW scenario against the cell, observe activity in dashboard
-6. Run worker + SAA scenario, observe activity
-
-## Verification
-
-1. **Build**: `go build ./...` succeeds.
-2. **Lint/vet**: `go vet ./...` clean on our files.
-3. **List scenarios**: `go run ./cmd list-scenarios` includes both names.
-4. **Local test — SAW**: `run-scenario-with-worker --embedded-server --iterations 5` completes.
-5. **Local test — SAA**: Same command hits `StartActivityExecution` on the server (expected to fail
-   on dev server with "disabled" error; succeeds on cloud cell with CHASM enabled).
-6. **Cloud cell proof-of-concept**: Dashboard shows idle -> run scenario -> dashboard shows activity.
diff --git a/.task/research.md b/.task/research.md
deleted file mode 100644
index cab82543..00000000
--- a/.task/research.md
+++ /dev/null
@@ -1,159 +0,0 @@
-# SAA COGS Experiment: Research & Design
-
-## 1. Current State of Omes
-
-### Architecture
-Omes is a load generation framework for Temporal. Scenarios are Go files in `scenarios/` that
-register via `init()` → `loadgen.MustRegisterScenario()`. The scenario name comes from the
-filename. Execution flows:
-
-1. `run-scenario` command: dials Temporal, runs scenario executor
-2. `run-worker` command: starts a worker (Go/Python/etc) polling a task queue
-3. `run-scenario-with-worker`: runs both together (local development)
-
-### Executor Types
-- `GenericExecutor`: takes a `func(ctx, *Run) error` — most flexible
-- `KitchenSinkExecutor`: wraps `GenericExecutor`, starts kitchen-sink workflows with configurable action sequences
-- `FuzzExecutor`: random action generation
-
-### Existing Standalone Activity Support
-Branch `standalone-activity` (commit `efbbb7f`) adds SAA to the `throughput_stress` scenario as
-an *optional extra activity within a workflow*. The implementation:
-
-1. Proto: `StandaloneActivity` message in `kitchen_sink.proto`
-2. Helper: `StandaloneActivity()` in `loadgen/kitchensink/helpers.go` creates an action
-3. Worker: `ExecuteStandaloneActivity()` in `workers/go/kitchensink/kitchen_sink.go` — called as a
-   *workflow activity* that internally calls `StartActivityExecution` + `PollActivityExecution`
-4. Scenario: enabled via `--option enable-standalone-activity=true`
-
-**Critical observation**: This existing support executes SAA *from within a workflow activity*.
-That is useful for testing SAA functionality but **not** for the COGS experiment. For COGS, we need
-to run SAA directly from the load generator (no workflow involved) so that the only server-side
-work is the standalone activity execution itself.
-
-## 2. What We Need for the COGS Experiment
-
-### Two New Scenarios
-
-**`saa_cogs_saw`** — Single Activity Workflow (the baseline):
-- Each iteration: start a workflow that executes one activity (payload: 256B in, 256B out), then completes
-- This is very close to `workflow_with_single_noop_activity` but with a payload activity
-
-**`saa_cogs_saa`** — Standalone Activity:
-- Each iteration: call `StartActivityExecution` directly from the load generator, then
-  `PollActivityExecution` to wait for the result
-- No workflow involved
-- Same activity (payload: 256B in, 256B out) and task queue
-- **Requires a `GenericExecutor`** since `KitchenSinkExecutor` always starts workflows
-
-Both scenarios must use the same worker (the Go worker with `payload` activity registered).
-
-### Key Design Decisions
-
-1. **Activity type**: `payload` with 256B input, 256B output (matching the COGS analysis)
-2. **No heartbeat, no retry** (matching the COGS analysis; retry max_attempts=1)
-3. **Fixed start rate** (not fixed concurrency) — controls for latency differences
-4. **Same task queue** for both scenarios — ensures same worker setup
-5. **Sync match preferred** — the COGS analysis assumes sync match; verify via metrics
-
-### SAA Load Generator Implementation
-
-The SAA scenario needs to call gRPC APIs directly. Looking at the existing
-`ExecuteStandaloneActivity` in the worker code (`workers/go/kitchensink/kitchen_sink.go:46-120`),
-we have a working reference. The scenario version should:
-
-1. Use `client.WorkflowService()` to get the gRPC client
-2. Call `StartActivityExecution` with the activity config
-3. Call `PollActivityExecution` to wait for completion
-4. This is a `GenericExecutor` with a custom `Execute` function
-
-## 3. Cloud Cell Operations
-
-### Connecting to a Cloud Cell
-
-From `bench-go.mdx`, the namespace format for test cells is `{cellId}-marathon.e2e` and the host
-is `{cellId}-marathon.e2e.tmprl-test.cloud:7233`. For our cell `s-saa-cogs`:
-- Namespace: `s-saa-cogs-marathon.e2e` (to be confirmed — Stephen may have set up differently)
-- Host: `s-saa-cogs-marathon.e2e.tmprl-test.cloud:7233`
-
-Omes connects via:
-```
---server-address <host:port> --namespace <ns> --tls --tls-cert-path <cert> --tls-key-path <key>
-```
-
-Or with API key auth:
-```
---server-address <host:port> --namespace <ns> --tls --auth-header "Bearer <api-key>"
-```
-
-### Running omes against a cloud cell
-
-Two options:
-1. **Local**: Run `go run ./cmd run-scenario` and `go run ./cmd run-worker` locally, connecting to
-   the cloud cell via TLS. Simplest for proof-of-concept. Higher latency (network round trip to
-   cloud) but the load generator itself isn't on the critical path for COGS measurement.
-2. **K8s pod**: Deploy omes worker as a pod on the cell's k8s cluster. Lower latency, more
-   realistic. The bench-go runbook shows this is the standard approach. Uses `omni scaffold` with
-   `--benchgo-enabled` or manual deployment.
-
-For initial proof-of-concept: run locally. For the actual experiment: deploy to k8s.
-
-### Grafana Dashboard
-
-The dashboard at `https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs` uses a `$cluster`
-variable. Set `cluster=s-saa-cogs` to point at our cell.
-
-### Cell Setup Verification
-
-Use `ct` / `omni` to verify cell state:
-```sh
-# Check cell status
-ct kubectl --context s-saa-cogs get pods -n temporal
-
-# Check namespace exists
-omni admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e
-```
-
-### Search Attributes
-
-Cloud cells cannot register search attributes via the SDK — they must be registered via the
-control plane. The `--do-not-register-search-attributes` flag exists for this. We should use it,
-and register `OmesExecutionID` separately if needed. For the simple COGS scenarios, we may not
-even need search attributes.
-
-## 4. Implementation Plan
-
-### Phase 1: Minimal Scenarios (omes code changes)
-
-1. Create `scenarios/saa_cogs_saw.go` — SAW scenario using `KitchenSinkExecutor`
-2. Create `scenarios/saa_cogs_saa.go` — SAA scenario using `GenericExecutor` with direct gRPC calls
-3. Both share config: payload size, start rate, duration
-
-### Phase 2: Local Proof-of-Concept
-
-1. Test both scenarios against local Temporal server
-2. Run `go run ./cmd run-scenario-with-worker` for SAW
-3. For SAA: run worker separately, then scenario (since SAA doesn't use workflows but the
-   worker still needs to poll for activity tasks)
-
-### Phase 3: Cloud Cell Connection
-
-1. Obtain credentials for s-saa-cogs cell
-2. Verify dashboard shows idle state
-3. Run a single SAW iteration and observe metrics
-4. Run a single SAA iteration and observe metrics
-
-### Phase 4: Full Experiment
-
-1. Deploy omes worker to cloud cell k8s
-2. Run SAW at target start rate for target duration
-3. Wait for cool-down, collect metrics
-4. Run SAA at same start rate for same duration
-5. Collect and compare metrics
-
-## 5. Open Questions
-
-- What namespace(s) are configured on s-saa-cogs?
-- How do we obtain TLS certs or API keys for the cell? (Check oncall or runbooks repos or search slack)
-- Does the cell have CHASM standalone activities enabled? (Dynamic config flag)
-- Worker deployment: should we use the existing bench-go infrastructure or deploy omes directly?
diff --git a/.task/saacogs.json b/.task/saacogs.json
deleted file mode 100644
index bb30e263..00000000
--- a/.task/saacogs.json
+++ /dev/null
@@ -1,703 +0,0 @@
-{
-  "annotations": {
-    "list": [
-      {
-        "builtIn": 1,
-        "datasource": { "type": "grafana", "uid": "-- Grafana --" },
-        "enable": true,
-        "hide": true,
-        "iconColor": "rgba(0, 211, 255, 1)",
-        "name": "Annotations & Alerts",
-        "type": "dashboard"
-      }
-    ]
-  },
-  "editable": true,
-  "fiscalYearStartMonth": 0,
-  "graphTooltip": 0,
-  "links": [],
-  "panels": [
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
-      "id": 1,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"frontend-.*\"})",
-          "legendFormat": "frontend",
-          "range": true,
-          "refId": "A"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"})",
-          "legendFormat": "history",
-          "range": true,
-          "refId": "B"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"matching-.*\"})",
-          "legendFormat": "matching",
-          "range": true,
-          "refId": "C"
-        }
-      ],
-      "title": "CPU per service (vCPU)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          },
-          "unit": "decbytes"
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
-      "id": 2,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"frontend\",workload_type=\"deployment\"}))",
-          "legendFormat": "frontend",
-          "range": true,
-          "refId": "A"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"history\",workload_type=\"deployment\"}))",
-          "legendFormat": "history",
-          "range": true,
-          "refId": "B"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"matching\",workload_type=\"deployment\"}))",
-          "legendFormat": "matching",
-          "range": true,
-          "refId": "C"
-        }
-      ],
-      "title": "Memory per service (p50 working set)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 8 },
-      "id": 3,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"frontend\"}[$__rate_interval]))",
-          "legendFormat": "{{operation}}",
-          "range": true,
-          "refId": "A"
-        }
-      ],
-      "title": "Frontend RPC by method (req/s)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 8 },
-      "id": 4,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval]))",
-          "legendFormat": "{{operation}}",
-          "range": true,
-          "refId": "A"
-        }
-      ],
-      "title": "History RPC by method (req/s)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 8 },
-      "id": 5,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"matching\"}[$__rate_interval]))",
-          "legendFormat": "{{operation}}",
-          "range": true,
-          "refId": "A"
-        }
-      ],
-      "title": "Matching RPC by method (req/s)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
-      "id": 6,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum by (table)(rate(cassandra_query{cluster=\"$cluster\",verb!=\"select\"}[$__rate_interval]))",
-          "legendFormat": "query: {{table}}",
-          "range": true,
-          "refId": "A"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum by (table)(rate(cassandra_batch{cluster=\"$cluster\"}[$__rate_interval]))",
-          "legendFormat": "batch: {{table}}",
-          "range": true,
-          "refId": "B"
-        }
-      ],
-      "title": "Astra writes by table (req/s)",
-      "description": "Validate r_Cass = 3/7 for writes. cassandra_query filtered to verb!=select; cassandra_batch is always writes.",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
-      "id": 7,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum by (table)(rate(cassandra_query{cluster=\"$cluster\",verb=\"select\"}[$__rate_interval]))",
-          "legendFormat": "{{table}}",
-          "range": true,
-          "refId": "A"
-        }
-      ],
-      "title": "Astra reads by table (req/s)",
-      "description": "Reads are not expected to differ much between SAW and SAA (similar caching, ~1 read on creation).",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 },
-      "id": 8,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum by (walType)(rate(wal_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
-          "legendFormat": "{{walType}}",
-          "range": true,
-          "refId": "A"
-        }
-      ],
-      "title": "WAL operation rate by type (ops/s)",
-      "description": "Covers both reads and writes (no separate write-only metric). Expect HISTORY_EVENT_WAL activity for SAW only; both use MUTABLE_STATE_WAL.",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
-      "id": 9,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum by (operation)(rate(visibility_persistence_requests{cluster=\"$cluster\"}[$__rate_interval]))",
-          "legendFormat": "{{operation}}",
-          "range": true,
-          "refId": "A"
-        }
-      ],
-      "title": "Visibility persistence rate by operation (ops/s)",
-      "description": "OSS visibility_persistence_requests counter, tagged by operation (RecordWorkflowExecutionStarted, RecordWorkflowExecutionClosed, UpsertWorkflowExecution, DeleteWorkflowExecution).",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${datasource}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 0,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "auto",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": null },
-              { "color": "red", "value": 80 }
-            ]
-          }
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 32 },
-      "id": 10,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "11.4.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum(rate(syncmatch_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
-          "legendFormat": "sync match",
-          "range": true,
-          "refId": "A"
-        },
-        {
-          "datasource": { "type": "prometheus", "uid": "${datasource}" },
-          "editorMode": "code",
-          "expr": "sum(rate(asyncmatch_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
-          "legendFormat": "async match",
-          "range": true,
-          "refId": "B"
-        }
-      ],
-      "title": "Sync vs async match rate (matches/s)",
-      "description": "Health check on experimental conditions. Async match means tasks went through persistence/backlog rather than being dispatched directly to a waiting poller.",
-      "type": "timeseries"
-    }
-  ],
-  "preload": false,
-  "schemaVersion": 40,
-  "tags": [],
-  "templating": {
-    "list": [
-      {
-        "current": { "text": "prod", "value": "prod" },
-        "name": "env",
-        "options": [
-          { "selected": true, "text": "prod", "value": "prod" },
-          { "selected": false, "text": "dev", "value": "test" }
-        ],
-        "query": "prod : prod, dev : test",
-        "type": "custom"
-      },
-      {
-        "current": { "text": "prod thanos", "value": "af7fe237-211e-413e-9723-41a73886bcbb" },
-        "hide": 2,
-        "includeAll": false,
-        "name": "datasource",
-        "options": [],
-        "query": "prometheus",
-        "refresh": 1,
-        "regex": "${env:text}.*",
-        "type": "datasource"
-      },
-      {
-        "current": {},
-        "datasource": { "type": "prometheus", "uid": "${datasource}" },
-        "definition": "label_values(restarts,cluster)",
-        "includeAll": false,
-        "label": "Cluster",
-        "name": "cluster",
-        "options": [],
-        "query": { "query": "label_values(restarts,cluster)", "refId": "StandardVariableQuery" },
-        "refresh": 2,
-        "regex": "",
-        "type": "query"
-      }
-    ]
-  },
-  "time": { "from": "now-3h", "to": "now" },
-  "timepicker": {},
-  "timezone": "utc",
-  "title": "SAA COGS",
-  "uid": "saacogs",
-  "version": 1,
-  "weekStart": ""
-}
diff --git a/.task/task.md b/.task/task.md
deleted file mode 100644
index b9b47cc3..00000000
--- a/.task/task.md
+++ /dev/null
@@ -1,764 +0,0 @@
-For background context, please study the following documents carefully:
-
-START_DOCUMENT------------------------------------------------------------------------------
-# Temporal Activity Execution & saas-temporal Cloud Persistence: Implementation Overview
-
-## Part 1: Activity Execution Models in Temporal Server
-
-### 1.1 CHASM Standalone Activities (`chasm/lib/activity/`)
-
-CHASM standalone activities are first-class, independently-scheduled executions outside workflow context. They use **mutable state only** -- no history events.
-
-#### State Machine
-
-States defined in `chasm/lib/activity/proto/v1/activity_state.proto`:
-
-```
-UNSPECIFIED
-  → SCHEDULED
-    → STARTED
-      → COMPLETED (terminal)
-      → FAILED (terminal)
-      → CANCEL_REQUESTED → CANCELED (terminal)
-      → TIMED_OUT (terminal)
-      → TERMINATED (terminal)
-    → CANCEL_REQUESTED → CANCELED (terminal)
-    → TIMED_OUT (terminal)
-    → TERMINATED (terminal)
-    → SCHEDULED (retry path)
-```
-
-Lifecycle states (`activity.go:95-107`):
-- `LifecycleStateRunning`: SCHEDULED, STARTED, CANCEL_REQUESTED
-- `LifecycleStateCompleted`: COMPLETED
-- `LifecycleStateFailed`: FAILED, TERMINATED, TIMED_OUT, CANCELED
-
-#### State Transitions (`statemachine.go`)
-
-| Transition | From | To | Trigger |
-|---|---|---|---|
-| TransitionScheduled (37-77) | UNSPECIFIED | SCHEDULED | Initial scheduling |
-| TransitionRescheduled (87-127) | STARTED | SCHEDULED | Retry after failure |
-| TransitionStarted (130-169) | SCHEDULED | STARTED | Worker accepts task |
-| TransitionCompleted (177-202) | STARTED/CANCEL_REQUESTED | COMPLETED | Worker completes |
-| TransitionFailed (210-237) | STARTED/CANCEL_REQUESTED | FAILED | Non-retryable failure |
-| TransitionCancelRequested (278-295) | STARTED/SCHEDULED | CANCEL_REQUESTED | Cancel API called |
-| TransitionCanceled (304-331) | CANCEL_REQUESTED | CANCELED | Worker acknowledges cancel |
-| TransitionTerminated (246-275) | SCHEDULED/STARTED/CANCEL_REQUESTED | TERMINATED | Terminate API called |
-| TransitionTimedOut (340-374) | SCHEDULED/STARTED/CANCEL_REQUESTED | TIMED_OUT | Timer task fires |
-
-#### Mutable State Structures
-
-**ActivityState** (proto):
-- `activity_type`, `task_queue`, timeouts (`schedule_to_close`, `schedule_to_start`, `start_to_close`, `heartbeat`), `retry_policy`, `status`, `schedule_time`, `priority`, `cancel_state`, `terminate_state`
-
-**Activity Go Component** (`activity.go:52-68`):
-- `ActivityState` (embedded proto)
-- `Visibility: chasm.Field[*chasm.Visibility]` -- search attributes
-- `LastAttempt: chasm.Field[*ActivityAttemptState]` -- attempt count, stamp, started_time, failure details, worker identity
-- `LastHeartbeat: chasm.Field[*ActivityHeartbeatState]` -- heartbeat details and recorded_time
-- `RequestData: chasm.Field[*ActivityRequestData]` -- input, header, user_metadata
-- `Outcome: chasm.Field[*ActivityOutcome]` -- successful (output) or failed (failure)
-- `Store: chasm.ParentPtr[ActivityStore]` -- parent workflow (nil for standalone)
-
-#### Task Flow
-
-1. **Scheduling** (`handler.go:51-104`): `StartActivityExecution()` → creates Activity → applies TransitionScheduled
-2. **Dispatch** (`activity_tasks.go:21-79`): `activityDispatchTaskExecutor` pushes to matching service via `AddActivityTask()`
-3. **Start** (`activity.go:173-191`): `HandleStarted()` applies TransitionStarted, schedules start-to-close and heartbeat timeout tasks
-4. **Completion** (`activity.go:259-280`): `HandleCompleted()` applies TransitionCompleted
-5. **Failure** (`activity.go:284-323`): `HandleFailed()` checks retryability → either `tryReschedule()` or TransitionFailed
-6. **Heartbeat** (`activity.go:559-586`): Updates LastHeartbeat, reschedules heartbeat timeout task
-
-#### Timeout Tasks
-
-- **ScheduleToStartTimeoutTask** (`activity_tasks.go:81-116`): Non-retryable → TIMED_OUT
-- **ScheduleToCloseTimeoutTask** (`activity_tasks.go:118-150`): Non-retryable → TIMED_OUT
-- **StartToCloseTimeoutTask** (`activity_tasks.go:152-198`): Attempts retry via `tryReschedule()`; if not retryable → TIMED_OUT
-- **HeartbeatTimeoutTask** (`activity_tasks.go:200-276`): Validates heartbeat recency; attempts retry; if not retryable → TIMED_OUT
-
-#### Retry Logic
-
-- `shouldRetry()` (`activity.go:504-514`): Checks TransitionRescheduled possible, attempt < max, enough time remaining
-- `hasEnoughTimeForRetry()` (`activity.go:518-534`): Exponential backoff calculation against schedule-to-close deadline
-- `tryReschedule()` (`activity.go:489-502`): Applies TransitionRescheduled (increments attempt, schedules dispatch with backoff)
-
-#### Cancellation
-
-- `RequestCancelActivityExecution` (`handler.go:273-296`): Applies TransitionCancelRequested
-  - If SCHEDULED: immediately applies TransitionCanceled (`activity.go:414-433`)
-  - If STARTED: stays CANCEL_REQUESTED; worker receives cancellation on next interaction
-
----
-
-### 1.2 Legacy Workflow Activities
-
-Activities executed as part of a workflow use **mutable state (ActivityInfo) plus history events**.
-
-#### History Events
-
-```
-EVENT_TYPE_ACTIVITY_TASK_SCHEDULED (10)
-EVENT_TYPE_ACTIVITY_TASK_STARTED (11)
-EVENT_TYPE_ACTIVITY_TASK_COMPLETED (12)
-EVENT_TYPE_ACTIVITY_TASK_FAILED (13)
-EVENT_TYPE_ACTIVITY_TASK_TIMED_OUT (14)
-EVENT_TYPE_ACTIVITY_TASK_CANCEL_REQUESTED (15)
-EVENT_TYPE_ACTIVITY_TASK_CANCELED (16)
-```
-
-#### ActivityInfo Mutable State (`persistence/v1/executions.proto:524-661`)
-
-Core: `activity_id`, `activity_type`, `task_queue`, `scheduled_time`, `started_time`, `started_event_id`, `scheduled_event_id`
-
-Timeouts: `schedule_to_close_timeout`, `schedule_to_start_timeout`, `start_to_close_timeout`, `heartbeat_timeout`
-
-Retry: `attempt`, `has_retry_policy`, `retry_initial_interval`, `retry_maximum_interval`, `retry_maximum_attempts`, `retry_backoff_coefficient`, `retry_expiration_time`, `retry_non_retryable_error_types`, `retry_last_failure`
-
-State flags: `cancel_requested`, `cancel_request_id`, `timer_task_status` (bit flags), `stamp`, `paused`, `pause_info`
-
-#### Pending Activity States (`activity.go:53-61`)
-
-- SCHEDULED: `StartedEventId == 0`
-- STARTED: `StartedEventId != 0 && !CancelRequested`
-- CANCEL_REQUESTED: `CancelRequested`
-- PAUSED: `Paused && Scheduled`
-- PAUSE_REQUESTED: `Paused && Started`
-
-#### Timer Task Status Flags
-
-```go
-TimerTaskStatusCreatedScheduleToStart = 1
-TimerTaskStatusCreatedScheduleToClose = 2
-TimerTaskStatusCreatedStartToClose    = 4
-TimerTaskStatusCreatedHeartbeat       = 8
-```
-
-#### Pause/Unpause/Reset (unique to legacy model)
-
-- **Pause** (`activity.go:254-284`): Sets `paused = true`, increments stamp if SCHEDULED
-- **Unpause** (`activity.go:388-425`): Clears pause, regenerates retry task if SCHEDULED
-- **Reset** (`activity.go:286-379`): Resets attempt to 1, optionally resets heartbeat/options
-
-#### API Handlers (`service/history/api/`)
-
-- `recordactivitytaskstarted/api.go`: Creates ActivityTaskStartedEvent
-- `respondactivitytaskcompleted/api.go`: Creates ActivityTaskCompletedEvent
-- `respondactivitytaskfailed/api.go`: Retry or ActivityTaskFailedEvent
-- `respondactivitytaskcanceled/api.go`: Creates ActivityTaskCanceledEvent
-- `recordactivitytaskheartbeat/api.go`: Updates heartbeat state, reschedules timeout
-
----
-
-### 1.3 Activity Metrics (Both Models)
-
-Defined in `common/metrics/metric_defs.go`. Both models emit the same metric names.
-
-**Counters:**
-| Metric | Description |
-|---|---|
-| `activity_success` | Successful completions (excludes retries) |
-| `activity_fail` | Final failures (retries exhausted) |
-| `activity_task_fail` | Per-attempt failures (includes retries) |
-| `activity_cancel` | Canceled activities |
-| `activity_terminate` | Terminated activities (CHASM only) |
-| `activity_timeout` | Terminal timeouts |
-| `activity_task_timeout` | Per-timeout events (includes retries) |
-
-**Timers:**
-| Metric | Description |
-|---|---|
-| `activity_start_to_close_latency` | StartedTime → completion/failure/timeout |
-| `activity_schedule_to_close_latency` | ScheduleTime → completion/failure/timeout/cancel |
-
-**Tags:** `namespace`, `task_queue_family`, `operation`, `activity_type`, `versioning_behavior`, `workflow_type` (set to `__temporal_standalone_activity__` for CHASM). Timeout metrics additionally tagged with `timeout_type` (SCHEDULE_TO_START, SCHEDULE_TO_CLOSE, START_TO_CLOSE, HEARTBEAT).
-
-**Metric enrichment** (`activity.go:804-824`): `enrichMetricsHandler()` adds per-task-queue-family scoping via `metrics.GetPerTaskQueueFamilyScope()`.
-
----
-
-### 1.4 Key Differences
-
-| Aspect | CHASM Standalone | Legacy Workflow |
-|---|---|---|
-| Persistence | Mutable state only | Mutable state + history events |
-| Parent context | Standalone execution | Part of workflow execution |
-| State tracking | ActivityState + sub-components | ActivityInfo in workflow |
-| Task dispatch | Direct to matching service | Via workflow task completion |
-| Completion storage | Outcome field | History events |
-| Cancellation | Explicit CANCEL_REQUESTED state | Boolean flag in ActivityInfo |
-| Pause support | Not yet implemented | Full (pause, unpause, reset) |
-| Search attributes | Visibility component (chasm) | Workflow search attributes |
-
----
-
-## Part 2: saas-temporal Cloud Integration
-
-### 2.1 Architecture Overview
-
-saas-temporal wraps the Temporal server to run in Temporal Cloud cells by replacing core persistence with Cloud Data Storage (CDS), backed by:
-- **Datastax Astra Cassandra** for durable storage
-- **Write-Ahead Logs (WALs)** for durability before Cassandra persistence
-- **OpenSearch/Elasticsearch** for workflow visibility
-- **Tiered Storage** (S3/GCS/Azure) for history archival
-
-### 2.2 Entry Point and Server Construction
-
-**Main:** `cmd/temporal-service/main.go`
-
-The `start` command:
-1. Loads OSS Temporal configuration from YAML
-2. Injects secrets (Astra, Elasticsearch credentials)
-3. Sets up dynamic configuration
-4. Optionally enables cloud metrics handler (Chronicle)
-5. Configures authorization (SaaS Auth0 JWT + Temporal JWT)
-6. Configures custom datastore with CDS
-7. Creates server via `cds.NewServer()`
-
-**Server creation:** `cds/export/cds/server.go`:
-```go
-func NewServer(serviceFxOpts FxOptions, opts ...temporal.ServerOption) (temporal.Server, error) {
-    return newServerFx(TopLevelModule, serviceFxOpts, opts...)
-}
-```
-
-Uses Uber FX dependency injection with modules for persistence factory, dynamic config, serialization, and per-service modules (history, matching, frontend, worker).
-
-### 2.3 CDS Factory Architecture (`cds/export/cds/factory.go`)
-
-**FactoryProvider** (lines 51-65): Implements `client.AbstractDataStoreFactory`
-- `NumberOfShards`, `OrderedDatastoreConfigs` (shards → datastores)
-- `HistoryDatastoreConfigs` (weighted distribution)
-- `WALFollowerProviders` for WAL followers
-- `Clock`, `DynamicConfig`, `ChasmRegistry`
-
-**Factory**: Manages three WAL pools:
-- **MS WAL** (MutableState): Records mutable state mutations
-- **HE WAL** (HistoryEvent): Records history events
-- **LP WAL** (LargePayload): Records oversized payloads
-
-Plus store providers: `MultiDBStoreProvider` for ordinal datastores, separate history store provider with tiered storage, optional Walker integration.
-
-### 2.4 Astra Cassandra Integration (`cds/storage/cassandra/astra/`)
-
-**Session creation** (`gocql.go`): Wraps gocql with Astra-specific config (TLS, connection pooling, retry policies) via Datastax `gocql-astra`.
-
-**Query instrumentation** (`gocql_metrics.go:48-100`): `queryMetricsObserver` instruments every query with 150-entry LRU statement cache.
-
-**Cassandra Metrics:**
-| Metric | Description |
-|---|---|
-| `CassandraConns` | Connection count |
-| `CassandraQueryTotalLatency` | Query latency |
-| `CassandraBatchTotalLatency` | Batch latency |
-| `CassandraQuery` | Query count |
-| `CassandraBytesTx` / `CassandraBytesTx` | Network bytes |
-| `CassandraLargeResponse` / `CassandraLargeRequest` | Large payload detection |
-| `CassandraRetries` | Retry histogram |
-| `CassandraErrors` | Error counters |
-
-Tags: `OperationType` (INSERT/UPDATE/DELETE/SELECT), `TableName`, `CasTag` (CAS operation)
-
-### 2.5 Write-Ahead Logs (`cds/export/wal/`, `cds/stream/`)
-
-WALs provide durability guarantees before data reaches Cassandra.
-
-**WAL Client Interface** (`cds/export/wal/crud.go`):
-```go
-WriteMS(), WriteHE(), WriteLP()  // Write operations per pool
-ReadMS(), ReadHE(), ReadLP()     // Read operations per pool
-```
-
-**Configuration** (`cds/config/configs.go:46-140`):
-- Rate limiting: `WALReadsRate`, `WALReadsBurst`
-- Timeouts: `WALDialTimeout`, `WALReadTimeout`, `WALWriteTimeout`
-- Ledger rotation: `WALLedgerRotationBytesThreshold`, `WALLedgerRotationAgeThreshold`
-- Retention: `WALLedgerLifetime`
-- Parallelism: `WALMaxParallelReads`
-- Feature flags: `WALReadV2Enabled`, `WALV2EncodingEnabled`
-
-**WAL Metrics** (`cds/metrics/metrics.go:34-56`):
-| Metric | Description |
-|---|---|
-| `wal_latency` | Operation latency |
-| `wal_stream_dial_attempt/success/error` | Connection establishment |
-| `wal_stream_dns_latency` | DNS resolution |
-| `wal_stream_connect_latency` | TCP connect |
-| `wal_stream_handshake_latency` | TLS handshake |
-| `wal_stream_send/receive_latency` | I/O latency |
-| `wal_health_check_failed_count` | Connection health |
-| `wal_write_timeout_count` | Timeout tracking |
-| `wal_reader_page_latency` | Page read latency |
-| `wal_entries_per_read` | Batch size histogram |
-| `wal_compression_count` | Compression events |
-
-**Flush Metrics** (lines 13-27):
-| Metric | Description |
-|---|---|
-| `flush_latency` | Time to flush to persistence |
-| `flush_error` | Flush failures |
-| `flush_snapshot_aborts` | Snapshot abort count |
-| `flush_persistence_behindness_bytes/count/time` | Persistence lag |
-| `flush_time_since_last_persist` | Staleness |
-| `flush_reason_count` | Flush trigger reasons (by namespace) |
-
-**Recovery Metrics** (lines 57-70):
-| Metric | Description |
-|---|---|
-| `recovery_total_latency` | Full recovery duration |
-| `recovery_open_reader_latency` | Snapshot reader open |
-| `recovery_rate_limiter_latency` | Rate limiting delay |
-| `recovery_first_read_latency/bytes` | Initial WAL read |
-| `recovery_takeover_latency` | Takeover phase |
-| `recovery_wal_update_latency` | WAL update during recovery |
-
-**Ledger Metrics** (lines 77-82):
-| Metric | Description |
-|---|---|
-| `ledger_rotation_count` | Rotations |
-| `logs_per_ledger` | Logs per ledger histogram |
-| `segments_per_shard` | Segments per shard histogram |
-| `segment_too_old_count` | GC candidates |
-| `active_segment_too_old_count` | Rotation delay |
-
-### 2.6 Execution Store Wrapper (`cds/export/cds/execution_store.go`)
-
-Wraps the Cassandra execution store to:
-- Convert mutable state mutations to WAL records (`NewMSWALRecord()`)
-- Convert history events to WAL records (`NewHEWALRecord()`)
-- Calculate storage metering
-- Manage snapshot trimming
-- Implement history event caching
-
-Implements `persistence.ExecutionStore` and `persistence.ShardStore`.
-
-### 2.7 How Activity State Flows Through CDS
-
-**CHASM activities**: Activity mutable state → MS WAL write → Cassandra persistence. No HE WAL involvement (no history events). State transitions are persisted as mutable state mutations via the execution store wrapper.
-
-**Legacy workflow activities**: ActivityInfo mutable state → MS WAL write → Cassandra. History events (Scheduled, Started, Completed, etc.) → HE WAL write → Cassandra. Both paths go through the execution store wrapper's WAL record conversion.
-
-### 2.8 OpenSearch/Elasticsearch Visibility (`visibility/`)
-
-**Factory:** `visibility/factory.go` -- `VisibilityStoreFactory` creates visibility stores configured per cloud cell.
-
-**Batch processor metrics** (`visibility/common/metrics_defs.go`):
-| Metric | Description |
-|---|---|
-| `visibility_batch_processor_request_add_latency` | Enqueue time |
-| `visibility_batch_processor_request_latency` | Total request latency |
-| `visibility_batch_processor_request_errors` | Failed requests |
-| `visibility_batch_processor_commit_latency` | Batch commit time |
-| `visibility_batch_processor_batch_size` | Items per batch histogram |
-| `visibility_batch_processor_batch_requests` | Requests per batch histogram |
-| `visibility_batch_processor_queued_requests` | Queue depth histogram |
-| `visibility_batch_processor_corrupted_data` | Data integrity failures |
-| `visibility_batch_processor_duplicate_request` | Deduplication events |
-
-### 2.9 Tiered Storage (`cds/persistence/tieredstorage/`)
-
-Long-term history archival to cloud object stores:
-- S3 (AWS): `s3_store.go`
-- GCS (Google Cloud): `gcs_store.go`
-- Azure Blob: `azure_client.go`
-
-Interface: `Upload()`, `Read()`, `Delete()`, `List()`, `PluginName()`
-
-Metrics: `ReadWorkflowHistory`, `UploadWorkflowHistory`, `DeleteWorkflowHistory`, `ListTieredStorageObjects`
-
-### 2.10 Persistence Store Metrics (`cds/persistence/metrics/defs.go`)
-
-**Store layer** (lines 70-85):
-| Metric | Description |
-|---|---|
-| `store_requests` | Request count by operation |
-| `store_latency` | Operation latency |
-| `store_errors` | Errors: shard_exists, shard_ownership_lost, condition_failed, timeout, unavailable |
-
-**Manager layer** (lines 89-102):
-| Metric | Description |
-|---|---|
-| `saas_persistence_requests` | High-level request count |
-| `saas_persistence_latency` | High-level latency |
-| `saas_persistence_errors` | Error tracking |
-
-Tags: `operation` (CreateShard, UpdateShard, GetWorkflowExecution, etc.), `component`, `cass_cluster`
-
-### 2.11 Cloud Metrics Infrastructure
-
-**Handler chain** (`cloudmetricshandler/delegating_recorders.go`):
-1. `allowlistedRecorder`: Filters through allowlist
-2. `multiRecorder`: Sends to multiple backends
-
-**Chronicle integration** (`cloudmetricshandler/chronicle_recorder.go`):
-- Enabled by `TEMPORAL_ENABLE_CLOUDMETRICSHANDLER`
-- Config: `/etc/temporal/cloudmetricshandler`
-- Kubernetes enrichment: pod name, namespace, labels
-- Backends: S3 writer, HTTP writer (to Chronicle service)
-- Batch config: 50K queue, 25K batch, 100ms flush
-
-**Action metering** (`actionmetering/metrics.go`):
-- `billable_action_count` with tags: namespace, action_type, workflow_type, workflow_task_queue
-- Activity type/task queue currently placeholder `"_unknown_"` with TODOs for standalone activity support
-
-### 2.12 Additional Cloud Features
-
-- **Authorization**: SaaS Auth0 JWT + Temporal JWT, TLS client certs
-- **Quotas/Flow Control** (`quotas/`, `flowcontrol/`): Request-level and task-queue quotas
-- **Multi-region replication** (`cds/service/history/replication/`): Custom replication filters
-- **Metering V3**: S3/GCS/Azure bucket metering
-- **SMS (etcd)**: Secondary Metadata Store for namespace/cluster metadata
-- **Dynamic config**: 150+ hot-reloadable properties (`cds/config/configs.go`)
-END_DOCUMENT--------------------------------------------------------------------------------------
-
-START_DOCUMENT------------------------------------------------------------------------------
-# Standalone Activity COGS and margins
-
-@Dan Davison March 17, 2026
-
-We want to ensure that we are billing in a way that meets our target margins for new product features in cloud, such as new CHASM execution types. To do this, we need to know certain things about COGS (cost of goods sold) for these features. This document outlines how to estimate COGS for Standalone Activity relative to Workflow and the implications of this for margins.
-
-# Motivation: avoiding cannibalization
-
-We have rules (see [temporalio/action](https://github.com/temporalio/action)) specifying how customer operations map to billable Actions. For example, suppose a customer executes a Workflow that executes a single Activity, which succeeds on first attempt without heartbeating. This incurs 2 Actions (StartWorkflow and ScheduleActivity). We’ll call this a “Single Activity Workflow” (SAW).
-
-We haven’t yet decided how we will bill for Standalone Activity (SAA). But suppose that we decide that executing a single SAA (no retries, no heartbeating) is 1 Action (StartStandaloneActivity).
-
-If we want SAA margins to match SAW margins, then we want the COGS of SAA (no retries, no heartbeating) to be ≤ 1/2 that of SAW (because we get half as much revenue for the SAA). If it is not, then there would be some degree of cannibalization (customers switch their single-activity workloads to SAA, but our margins there are worse). We’d hope it would be offset by increased volume, but we’d still prefer SAA margins to match SAW.
-
-### What about retries and heartbeating?
-
-SAW (no retries and no heartbeating) is 2 Actions. If the activity retries once it becomes 3 Actions (ScheduleActivity now happens twice); if it heartbeats once during each attempt it becomes 5 Actions.
-
-Let’s assume (as we currently intend) that we apply the same billing rules to Standalone Activity retries and heartbeating. Then, as long as SAA is not worse than Workflow Activity with respect to COGS of retries and heartbeating, our margins from those customer operations will be at least as good under SAA as when they are done in the context of a pre-CHASM workflow. CHASM has been designed for efficiency; we have reason to be optimistic that it’s not *worse* than the legacy workflow activity implementation.
-
-# Problem statement
-
-The above suggests that we should focus on estimating the ratio of COGS for Standalone Activity (SAA) relative to Single-activity Workflow (SAW) in the no retries, no heartbeating case:
-
-$$
-R = \frac{C_{SAA}}{C_{SAW}}.
-$$
-
-We expect $R < 1$ because SAA achieves execution of an activity with fewer RPCs, persistence operations, etc, than SAW. We are hoping that it is less than 1/2 since then our SAA margins are as good or better than our workflow margins, assuming we bill 1 Action for SAA.
-
-# Estimating the COGS ratio
-
-We’ll assume that the COGS for a SAA or SAW execution results solely from invoices from third parties relating to cloud compute resources. COGS for an execution type (SAA or SAW) is the sum of price ($p$) times quantity consumed ($q$) over all resources:
-
-$$
-C = \sum_{i} p_i q_i.
-$$
-
-We want the COGS ratio $R$. We can write that as a weighted average of per-resource usage ratios:
-
-$$
-R = \frac{C_{SAA}}{C_{SAW}} = \sum_i f_i r_i.
-$$
-
-This allows us to calculate $R$ as a function of two things that we can estimate:
-
-- $f_i = p_i q_{i}(SAW) / \sum_j p_j q_{j}(SAW)$ is the fraction of SAW COGS attributable to resource $i$ (“spend share”). We’ll use our current cloud spend for this.
-- $r_i = q_i(SAA) / q_i(SAW)$ is the per-resource usage ratio. We will estimate these by comparing the implementations or by running experiments in cloud cells.
-
-The resources ($i$) potentially include:
-1. Data egress
-2. CPU usage
-3. Memory usage
-4. Persistence operations against our WALs
-5. Persistence operations against Astra (to be replaced by Walker)
-6. Persistence operations against OpenSearch (visibility)
-7. Metrics/logs processing and storage costs, Clickhouse
-
-*At-rest data storage is excluded: we bill customers separately for storage on a GB/h basis, so it does not need to be subsidized by Actions. (Tangentially, it’s worth noting that we expect SAA storage to cost users half what they’d pay for SAW since SAW stores the input and output payloads in both workflow scheduled/complete events and activity scheduled/complete events.)*
-
-# Per-resource usage ratios
-
-To proceed, we need to estimate the SAW vs SAA usage ratio ($r_i$) for each resource.
-
-The following table summarizes the two implementations. It describes the simplest possible happy-path scenario: an activity that succeeds on first attempt without heartbeating, via sync matches.
-
-| # | Single-activity Workflow | Standalone Activity |
-| --- | --- | --- |
-| 1 | RPC: `StartWorkflowExecution` => HEWAL, MSWAL; Vis&; Cassandra& | RPC: `StartActivityExecution` => MSWAL; Vis&; Cassandra& |
-| 2 | Task => RPC: `AddWorkflowTask` |  |
-| 3 | RPC: `RecordWorkflowTaskStarted` => HEWAL, MSWAL; Cassandra& |  |
-| 4 | RPC: `RespondWorkflowTaskCompleted` => HEWAL, MSWAL; Cassandra& |  |
-| 5 | Task => RPC: `AddActivityTask` | Task => RPC: `AddActivityTask` |
-| 6 | RPC: `RecordActivityTaskStarted` => HEWAL, MSWAL; Cassandra& | RPC: `RecordActivityTaskStarted` => MSWAL; Cassandra& |
-| 7 | RPC: `RespondActivityTaskCompleted` => HEWAL, MSWAL; Cassandra& | RPC: `RespondActivityTaskCompleted` => MSWAL; Vis&; Cassandra& |
-| 8 | Task => RPC: `AddWorkflowTask` |  |
-| 9 | RPC: `RecordWorkflowTaskStarted` => HEWAL, MSWAL; Cassandra& |  |
-| 10 | RPC: `RespondWorkflowTaskCompleted` => HEWAL, MSWAL; Vis&; Cassandra& |  |
-- `&` indicates a write that’s not on the sync response path
-- `AddWorkflowTask` and `AddActivityTask` involve inter-service RPCs but no persistence writes in the happy path (“sync match”).
-- The table does not show worker poll requests
-- An additional `Vis&` is incurred in both cases when the execution is deleted.
-
-Comparing the implementations in the table gives
-
-$$
-r_{\text{WAL}} = \frac{3}{14} = 0.21,~~~~
-r_{\text{Cass}} = \frac{3}{7} = 0.43,~~~~
-r_{\text{Vis}} = \frac{3}{3} = 1.0.~~~~
-$$
-
-These ratios count writes only. Cassandra reads are not expected to differ much between SAW and SAA since they use similar caching mechanics with the result that a high proportiion of both SAW and SAA executions incur ~1 read (on execution creation);.
-
-In addition, we can estimate data transfer costs by comparing the implementations. These are likely dominated by egress to customer infra (ingress is free on AWS and GCP; data transfers to Astra, OpenSearch, and Grafana are in-VPC or via PrivateLink). Let the activity input and output payload sizes be $S_I$ and $S_O$. Payload egress for SAW is $2S_I + 2S_O$ (input payload sent to workflow and activity workers; output payload sent to workflow worker and client). For SAA this is $S_I + S_O$ since there is no workflow worker detour. This gives
-
-$$
-r_\text{data\_transfer} = 0.5.
-$$
-
-# COGS ratio estimate
-
-Using approximate/preliminary cloud spend share numbers (thanks @Stephen Chan ) we have:
-
-| **Resource** | **Spend share $f_i$ (preliminary)** | **Usage ratio $r_i$** | **Notes** |
-| --- | --- | --- | --- |
-| **Astra writes** | 40% | $\frac{3}{7}$ = 0.43 | SAW does 2 additional writes for each WFT |
-| **Visibility** (OpenSearch) | 20% | $\frac{3}{3}$ = 1.00 | Equal — both SAA and SAW produce exactly ~~2~~ 3 visibility updates |
-| **WAL writes** | 10% | $\frac{3}{14}$ = 0.21 | Half of Astra ratio: SAA writes only to MSWAL, whereas SAW writes to both HEWAL and MSWAL |
-| **EC2 compute** | 10% | ? | Would need cloud cell experiment |
-| **Data transfer** | 10% | $\frac{1}{2}$ = 0.50 | SAW sends payloads via workflow worker round-trip; SAA does not |
-| **Overheads** (incl. Clickhouse) | 10% | ? |  |
-
-This gives the following estimate of the COGS ratio:
-
-$$
-\begin{align*}
-R &=
-\underbrace{0.4 \times 0.43}_{\text{Astra}:~0.17} +
-\underbrace{0.2 \times 1.0}_{\text{Vis}:~0.20} +
-\underbrace{0.1 \times 0.21}_{\text{WAL}:~0.02} +
-\underbrace{0.1 \times 0.50}_{\text{Tx}:~0.05} +
-0.1 \cdot r_\text{compute} + 0.1 \cdot r_\text{overhead} \\\\
-&=
-0.44 + 0.1(r_\text{compute} + r_\text{overhead}).
-\end{align*}
-$$
-
-# Sensitivity analysis
-
-Before thinking about the implications of this for billing and margins, the next steps are:
-
-1. Refine the cloud spend estimates (Cloud Capacity team; does not involve load experiments)
-2. Decide whether we want to do load experiments to estimate $r_\text{compute}$
-3. Decide how we will address $r_\text{overhead}$
-
-For (2) and (3) we can do some initial sensitivity analysis:
-
-SAW does 10 RPCs vs SAA’s 4 (with 7 vs 3 of them doing persistence writes in the sync-match case). If services are CPU-bound then this suggests that $0.4 < r_\text{compute} < 1.0$ might be reasonable.
-
-The other overheads include (per @Stephen Chan ) Clickhouse, observability cells, and Envoy proxies. Since these costs should also scale with RPC count, let’s assume the same bounds: $0.4 < r_\text{overhead} < 1.0$. This gives:
-
-$$
-0.52 \leq R \leq 0.64.
-$$
-
-![image.png](.task/sensitivity.png)
-
-For example, if SAW margins were 70%, SAA margins would be 62% - 69%. This margin reduction would affect at maximum the ~3% of workflows that are SAW.
-
-- COGS ratio to margins conversion formula
-
-     $\text{margin}_{\text{SAA}} = 1 - 2R(1 - \text{margin}_{\text{SAW}})$.
-
-
-# Discussion
-
-- **Visibility limits SAA margins**. Visibility is expensive (20%), but SAA and SAW perform the same number of visibility writes, so it combines a large weight with the worst possible ratio.
-- **(Unfavorable) Over-provisioning would push $R$ up.** The usage ratios above for persistence are derived from write counts, which only translate to cost savings if capacity tracks usage. But e.g. Astra is bought in fixed hardware units (“Astra Classic”). If any resource component is over-provisioned then SAA and SAW would pay the same cost per execution and $r_i \to 1.0$, making SAA margins less attractive relative to workflow.
-- **Cloud spend share**. We could attempt to separate fixed costs and renormalize (see [Next steps](https://www.notion.so/Next-steps-3268fc567738805e82ddd9c1e1d4c9d1?pvs=21)). This would be favorable to SAA margins if it decreases the visibility share, but unfavorable if it decreases Astra share.
-
-    We’re estimating $f_i$ from cloud spend, so we’re assuming that the spend distribution for single-activity workflows would be similar to the spend distribution for the real mix of customer workflows. I suspect this is a reasonable modeling assumption since in both cases the application is performing the same state transitions in response to workflow and activity task processing.
-
-- **(Mixed) Effect of migration to Walker**. Walker replaces Astra with storage that is under our own control, making right-sizing easier. This may mean that the 3/7 write ratio is more fully realized under Walker, moving SAA COGS away from SAW. However, Walker will be cheaper than Astra, so persistence’s share of spend shrinks. Since persistence is where SAA has its largest advantage, this would bring SAA COGS closer to SAW.
-
-    These two effects act in opposite directions and the net result will depends on their relative magnitudes. This suggests that we should monitor COGS calculations as the Walker migration proceeds.
-
-- **(Future) A visibility backend migration would improve SAA margins.** There has been [movement](https://www.notion.so/Visibility-CDS-2a98fc567738807e9ee0f318edc4c16f?pvs=21) toward replacing OpenSearch. As discussed above, any reduction in visibility spend share would make SAA COGS more attractive relative to workflow.
-
-# Conclusion
-
-- [We are planning to bill SAA at 1/2 the price of SAW](https://www.notion.so/PRD-Standalone-Activities-for-durable-job-processing-1ee8fc567738806d8b6fe8e2eeae0fc4?pvs=21). Although there are various assumptions involved, at this point it looks like SAA COGS will be more than 1/2 SAW COGS: the estimated range above is $0.52 \leq R \leq 0.64$. This implies that some degree of cannibalization is likely. The extent of cannibalization would be bounded by the proportion of current workloads that are SAW, which is 3% per @Phil Prasek. It may be offset by volume growth attributable to SAA.
-
-# Next steps
-
-- **Refine cloud spend share estimates.**
-
-    The cloud spend share weights used in this analysis are supposed to be marginal costs. We could attempt to separate marginal vs fixed costs and renormalize our spend share percentages. This would be favorable to SAA margins if it decreases the visibility share, but unfavorable if it decreases Astra share.
-
-- **Investigate any impact of over-provisioning.**
-
-    SAA margins may be less favorable than the calculations suggest if some resources are over-provisioned. See discussion [above](https://www.notion.so/Standalone-Activity-COGS-and-margins-3268fc567738803cb63fd9397ffd351c?pvs=21).
-
-- **Decide whether to do cloud cell experiments**.
-
-    Unlike the other resource categories, we lack any obvious theoretical basis for estimating  $r_\text{compute}$ and $r_\text{overhead}$. Estimating $r_\text{compute}$ via cloud cell experiments would require perhaps one engineer-week.  If this were to show a value close to 0.4 then it would suggest that the upper bound on $R$ is 0.56, as opposed to the current 0.64. This would however still be subject to all the assumptions discussed above. We could also attempt to tighten our estimated bounds on $r_\text{overhead}$ via experiment.
-
-    If we decide to do this, the $r_\text{compute}$ experiment would be something like the following: choose a reference activity (e.g. sleeps for 10s, no heartbeating, never fails) and run SAA and SAW workloads on a cloud cell at a fixed start rate (e.g. 10/s) for a sustained period (e.g. 1hr). Fixing start rate rather than concurrency naturally controls for end-to-end latency differences between SAA and SAW.  $r_\text{cpu}$ and $r_\text{memory}$ can then be estimated from metrics as the ratio of mean utilization above the idle baseline. The analysis will need to decide how to combine them, e.g. based on which is more often limiting; alternatively, using the larger of the two would yield a conservative calculation.
-END_DOCUMENT------------------------------------------------------------------------------
-
-START_DOCUMENT------------------------------------------------------------------------------
-# Test plan for SAA COGS measurement
-
-@Dan Davison March 19, 2026
-
-The [SAA COGS proposal](.task/saa-cogs.md) made an initial estimate of the SAA/SAW COGS ratio based on estimating persistence, visibility, and data transfer usage ratios directly from the implementation. But for compute and overheads we have no analytical estimate. We plan to run an experiment to:
-
-1. Estimate the missing $r_\text{compute}$.
-2. Validate the analytical $r_i$ against observed metrics
-
-For comparison, the Fairness COGS experiment docs:
-
-- [Test plan](https://www.notion.so/temporalio/Test-plan-for-COGS-measurement-28c8fc56773880169cdcc4087a98ceaf)
-- [Fairness COGS Impact](https://www.notion.so/temporalio/Fairness-COGS-Impact-2c58fc567738808f806cfbf09b771b2c)
-- [Pricing Council doc](https://www.notion.so/temporalio/WIP-Pricing-Council-Fairness-COGS-Impact-2cc8fc56773880dcb3efe435623edd9a)
-
-
-
-
-# Proposed SAA experiment
-
-
-## Workloads
-
-Two workloads, run sequentially on the same cell:
-
-1. **SAW**: execute workflow with one activity (no heartbeat, no retry).
-2. **SAA**: execute standalone activity (no heartbeat, no retry).
-
-## Parameters
-
-**Start rate.** I think that we should fix start rate rather than concurrency, since this naturally controls for end-to-end latency differences between SAA and SAW (i.e. a cell running SAW will see higher load because the concurrency will be higher because the SAW end-to-end latency is higher). The fairness experiment used 4k tasks/s. Is starting 4k executions/s reasonable for us?
-
-**Activity.** Immediate successful return; no heartbeat, no retry. We could compare with a 1s sleep to see if result differ?
-
-**Sync match.** Do one run such that sync match should be 100%, and another tuned such that sync match is lower? Verify sync match from metrics (`syncmatch_latency`, `asyncmatch_latency`)
-
-**Duration and repetitions.** Steady-state load; we need long enough for stable CPU averages. The
-fairness experiment used 6h per scenario but this was maybe because of their more sophisticated
-sinusoidal load design? 1h more than enough for the SAA experiment? ≥2 runs per workload to check
-variance/reproducibility.
-
-## Infrastructure
-
-- Anything special about test cell sizing?
-- Workers should run outside the cell (how did fairness experiment do this?)
-
-## Metrics
-
-Initial dashboard content https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs:
-
-
-- **CPU per service** (frontend, history, matching). `node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate` — a k8s recording rule over cAdvisor container metrics (defined in saas-components prometheus rules).
-- **Memory per service**. `container_memory_working_set_bytes` — also k8s/cAdvisor (defined in saas-components alert rules).
-- **RPC rate by method**, one panel per service (frontend, history, matching). `service_requests` counter ([temporal:common/metrics/metric_defs.go:615](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)), tagged with `operation` (the RPC method name). Recorded by a gRPC server-side interceptor ([telemetry.go:177](https://github.com/temporalio/temporal/blob/main/common/rpc/interceptor/telemetry.go)), so it captures inter-service RPCs (e.g. history→matching `AddActivityTask`).
-- **Astra writes by table**. `cassandra_query` counter with `verb!="select"`, plus `cassandra_batch` counter, both broken down by `table`. Tags include `operation`, `table`, `verb`, `cas` ([saas-temporal:cds/metrics/metrics.go:233,238](https://github.com/temporalio/saas-temporal/blob/main/cds/metrics/metrics.go)).
-- **Astra reads by table**. `cassandra_query` with `verb="select"`, broken down by `table`.
-- **WAL operation rate by type**. `wal_latency_count` ([saas-temporal:cds/metrics/metrics.go:35](https://github.com/temporalio/saas-temporal/blob/main/cds/metrics/metrics.go)) broken down by `walType` label (values: `MUTABLE_STATE_WAL`, `HISTORY_EVENT_WAL`, `LARGE_PAYLOAD_WAL` — see [saas-temporal:cds/common/tag/tag.go:11-24](https://github.com/temporalio/saas-temporal/blob/main/cds/common/tag/tag.go)). Note: this metric covers both reads and writes; there is no separate write-only WAL metric. This is arguably more relevant to COGS since WAL reads also cost something.
-- **Visibility persistence rate by operation**. `visibility_persistence_requests` counter ([temporal:common/metrics/metric_defs.go:1398](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)), tagged with `operation` (values include `RecordWorkflowExecutionStarted`, `RecordWorkflowExecutionClosed`, `UpsertWorkflowExecution`, `DeleteWorkflowExecution` — see [visiblity_manager_metrics.go](https://github.com/temporalio/temporal/blob/main/common/persistence/visibility/visiblity_manager_metrics.go)).
-- **Sync vs async match rate**. `syncmatch_latency_count` and `asyncmatch_latency_count` ([temporal:common/metrics/metric_defs.go:1119-1120](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)).
-
-
-## Load generator (omes)
-
-- Add a new scenario that starts standalone activities directly from the load generator, not from within a workflow.
-- Build the omes Go worker Docker image and deploy it as a pod on k8s, configured to poll the test cell. Do we have implementation we can borrow from the fairness experiment?
-
-
-
-
-<details>
-<summary>Appendix: Comparison with fairness experiment (see commits by David Reiss)</summary>
-
-| | Fairness | SAA |
-|---|---|---|
-| **Treatments** | Same workload, two matcher modes | Two execution types (SAW vs SAA) |
-| **Quantity computed** | $\Delta C / C$ | Ratio $r_i = q_i(\text{SAA}) / q_i(\text{SAW})$ |
-| **Load shape** | Sinusoidal backlog (exercises matcher) | Steady-state at fixed start rate (our model assumes sync match) |
-| **What is measured** | CPU per service, Astra operation rates | CPU per service, memory per service, Astra operation rates by table and verb, WAL write rates, visibility write rates, RPC handling rates per service per method |
-| **Predictions to validate** | None — purely empirical | $r_\text{Cass} = 3/7$, $r_\text{WAL} = 3/14$, $r_\text{Vis} = 3/3$, per-method RPC rates matching proposal table |
-
-Fixed start rate (not fixed task throughput) because SAA and SAW generate different numbers of tasks per execution.
-
-**Question**: what is the incremental COGS of enabling the fairness matcher vs the classic matcher?
-
-**COGS components**: (1) Astra queries (~35% of total COGS), (2) EC2 compute (~9%, split across frontend+matching and history). Ignored: data transfer, Astra storage, non-AWS costs (Clickhouse <3%).
-
-**Setup**: dedicated test cell `s-oss-dnr-faircogs3` (64 partitions). Load generator: Omes Ebb and Flow — sinusoidal activity task backlog. 5 scenarios (classic, fairness with 0/1k/100k keys, priority), each 6 hours. Measured via [dedicated Grafana dashboard](https://grafana.tmprl-internal.cloud/d/df6pldpkiy1vka/faircogs).
-
-**Results**: Astra showed no significant increase. CPU increased up to 23% (frontend) and 36% (history) in the worst case (1k fairness keys). COGS impact: $(0.035 \times 0.23) + (0.057 \times 0.36) = 2.8\%$. Pricing council recommendation: price fairness on value to customer, not COGS.
-
-
-
-
-
-</details>
-
-<details>
-<summary>Appendix: possible experimental outcomes</summary>
-
-- **Analytical predictions confirmed, $R$ in predicted range.** Observed $r_\text{Cass}$, $r_\text{WAL}$, $r_\text{Vis}$, and per-method RPC rates match the analytical derivations. $r_\text{compute}$ lands in $[0.4, 1.0]$, giving $R$ in roughly $0.52$–$0.64$. We present $R$ with a tighter confidence interval than the proposal (because $r_\text{compute}$ is now estimated, not bounded).
-- **$r_\text{compute}$ is low, pushing $R$ toward 0.5.** If $r_\text{compute} \approx 0.4$ and analytical predictions hold, $R \approx 0.52$. Cannibalization is near-zero.
-- **Observed $r_i$ diverge from analytical predictions.** Some assumption is wrong (e.g. sync match doesn't hold at test load, or there are unaccounted persistence writes). We recompute $R$ using observed values and identify which assumption failed and whether it reflects production conditions or a test artifact.
-- **$R$ is higher than predicted.** $R > 0.64$ would mean worse cannibalization than estimated. Options: accept the margin reduction (bounded by ~3% SAW share), adjust billing, or identify engineering work to reduce SAA COGS.
-
-</details>
-
-END_DOCUMENT------------------------------------------------------------------------------
-
-
-Your task is to help me design and build the omes-based tooling that we will use to perform the experiments outlined above to learn about COGS of SAA an SAW. We are in the omes repo; study it carefully. Our work will broadly break into the following phases that we must design holistically:
-
-(1) Add any missing omes functionality that will be needed in order to be able to use omes to generate the SAA and SAW load for the experiments.
-(2) Run the experiments against the cloud cell that Stephen has prepared: its name is s-saa-cogs.
-
-Stephen linked to the 'scaffold' run that created the cell. I see it had the following input:
-
-{
-  "CellConfig": {
-    "Identity": {
-      "Location": {
-        "CloudProvider": "aws",
-        "AccountID": "124355634071",
-        "Region": "us-west-2"
-      },
-      "ID": "s-saa-cogs"
-    },
-    "Template": "v5-aws-dev",
-    "ServerVersion": "v3.151.9_oss1.31.0_151.6",
-    "AgentVersion": "v3.151.9_oss1.31.0_151.6",
-    "WebVersion": "v2.47.0",
-    "GoCanaryVersion": "v1.35.0",
-    "ComponentVersion": "v2026-03-20.00",
-    "WalVersion": "v10.0.3",
-    "EnableMetering": false
-  },
-  "FailurePolicy": 1
-}
-
-and output:
-
-{
-  "Cell": {
-    "Identity": {
-      "Location": {
-        "CloudProvider": "aws",
-        "AccountID": "124355634071",
-        "Region": "us-west-2"
-      },
-      "ID": "s-saa-cogs"
-    }
-  }
-}
-
-I am not familiar with performing operations against cloud cells, so you will need to resarch and help me during this. But we have several good resources: study the contents of the 'oncall' and 'runbooks' repos, and also use the /agent-slack skill. You also have Notion and Temporal Docs MCP. Use the more modern 'ct' rather than its alias 'omni'.
-
-Initial grafana dashboard JSON is at .task/saacogs.json.
-
-Important: I'd like an early aim to be to get an end-to-end proof-of-principle of this working. Therefore let's not make the omes component sophisticated initially; just the bare minimum to run an SAW and SAA workload. But I am a bit intimidated by doing anything with the cloud cell since I don't know how. So I guess one early aim is to be able to point our metrics dashboard at s-saa-cogs, and see idle state, then run one of our omes commands, and see activity increase in the dashboard. Please maintain a file of useful shell commands with terse comments where necessary. I will run them and show you the outut. Don't do operations against cloud or observability yourself unless I explicitly ask you to.
-
-In the omes work, we must use the latest version of the SDK with Standalone Activity support, such that our code is consistent with what Temporal Docs (use MCP) and the samples-go repo show.
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 120000
index 81744092..00000000
--- a/AGENTS.md
+++ /dev/null
@@ -1 +0,0 @@
-.task/AGENTS.md
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 120000
index 81744092..00000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1 +0,0 @@
-.task/AGENTS.md
\ No newline at end of file

From 7aadcec41d0fe43af51529d61fb6c2dfc773c8a7 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Mon, 23 Mar 2026 21:36:21 -0400
Subject: [PATCH 26/40] commands

---
 commands.sh | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/commands.sh b/commands.sh
index c0e53038..272d707a 100644
--- a/commands.sh
+++ b/commands.sh
@@ -48,6 +48,16 @@ export TEMPORAL_TLS_DISABLE_HOST_VERIFICATION=true
 ct admintools --context s-saa-cogs -- temporal operator search-attribute create \
   --namespace saa-cogs-4.temporal-dev --name OmesExecutionID --type Keyword
 
+# Dyanamic config
+# https://staging.thundergun.io/support/namespaces/saa-cogs-4.temporal-devo
+{
+  "activity.enableStandalone": true,
+  "history.enableChasm": true
+}
+
+# scale canary to 0
+ct kubectl --context s-saa-cogs patch deployment/temporal-go-canary -n temporal -p '{"spec":{"replicas":0}}'
+
 # SAW
 go run ./cmd run-scenario-with-worker \
   --scenario workflow_with_single_activity \

From 139bdea8e6d97dbf33131897be8f6b24e0874bf1 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Mon, 23 Mar 2026 21:36:29 -0400
Subject: [PATCH 27/40] Revert "cleanup"

This reverts commit 312bbe33950a1cd33899015a4d79f3037e29fe83.
---
 .task/AGENTS.md    |   3 +
 .task/plan.md      | 110 +++++++
 .task/research.md  | 159 ++++++++++
 .task/saacogs.json | 703 +++++++++++++++++++++++++++++++++++++++++
 .task/task.md      | 764 +++++++++++++++++++++++++++++++++++++++++++++
 AGENTS.md          |   1 +
 CLAUDE.md          |   1 +
 7 files changed, 1741 insertions(+)
 create mode 100644 .task/AGENTS.md
 create mode 100644 .task/plan.md
 create mode 100644 .task/research.md
 create mode 100644 .task/saacogs.json
 create mode 100644 .task/task.md
 create mode 120000 AGENTS.md
 create mode 120000 CLAUDE.md

diff --git a/.task/AGENTS.md b/.task/AGENTS.md
new file mode 100644
index 00000000..ff9f7032
--- /dev/null
+++ b/.task/AGENTS.md
@@ -0,0 +1,3 @@
+At the start of the conversation output the following so that I know you've read these instructions:
+
+📖 omes:saa-cogs
diff --git a/.task/plan.md b/.task/plan.md
new file mode 100644
index 00000000..acc2e53d
--- /dev/null
+++ b/.task/plan.md
@@ -0,0 +1,110 @@
+# Implementation Plan: SAA Load Generation
+
+## Goal
+
+Create two omes scenarios to generate SAW and SAA workloads against cloud cell `s-saa-cogs`, then
+observe metrics on the Grafana dashboard.
+
+## Design
+
+### Scenarios
+
+Both scenarios use `GenericExecutor` with a simple `Execute` function. This keeps the
+implementations symmetric — the only difference is what each iteration does, which is exactly the
+variable under test.
+
+**`workflow_with_single_activity`** — Each iteration calls `client.ExecuteWorkflow` with a dedicated
+minimal workflow that executes one `payload` activity and returns. Then `handle.Get()`.
+
+**`standalone_activity`** — Each iteration calls `client.ExecuteActivity` with the same `payload`
+activity. Then `handle.Get()`. No workflow involved.
+
+Both use the same task queue (derived from run-id) and the same Go worker.
+
+### Worker code
+
+Reuse the existing `payload` activity at [kitchen_sink.go:511-516](workers/go/kitchensink/kitchen_sink.go#L511-L516),
+already registered as `"payload"` at [worker.go:105](workers/go/worker/worker.go#L105).
+
+Add one new workflow: a minimal function that executes the `payload` activity with its input and
+returns the result. Register it on the existing Go worker at [worker.go:102](workers/go/worker/worker.go#L102)
+alongside the existing registrations. No new worker binary needed.
+
+### Activity configuration
+
+Both scenarios: `inputData []byte` (256 bytes), `bytesToReturn int32` (256). No heartbeat.
+`MaximumAttempts: 1` (no retries). `ScheduleToCloseTimeout: 60s`.
+
+### SDK version
+
+`go.temporal.io/sdk v1.40.0` already includes `client.ExecuteActivity`. No upgrade needed.
+
+## Implementation steps
+
+IMPORTANT: Rather than doing the implementation yourself, please "teach" the user to do the
+implementation themselves. Take a "painting by numbers" approach: Decide on the first component they
+should write, and insert a comment in the code indicating what they should do. Then pause and give
+them a clickable links to the comment, and to any existing prior art in the codebase they might want
+to refer to. Don't output code directly to them. Work with them to complete the stage; review their
+work carefully. Do not consider the stage complete until the work is done to an equal or greater
+standard than you yourself would have achieved. When that stage is completed by them, or with
+further assistance from you, move on to the next component to be implemented and repeat this
+procedure.
+
+Regarding names: we will not use "cogs" anywhere in omes code itself. Conceptually, the omes code is
+defining SAW and SAA workloads. What those are used for (to run an experiment) and why (COGS
+investigation) is not the concern of the omes code.
+
+### Step 1: Add workflow to worker
+
+Add a small file under `workers/go/` with the minimal workflow function. Register it in
+[worker.go](workers/go/worker/worker.go) alongside existing registrations.
+
+### Step 2: Create `scenarios/workflow_with_single_activity.go`
+
+`GenericExecutor` whose `Execute` function:
+1. Calls `run.Client.ExecuteWorkflow()` starting the new workflow with the payload input.
+2. Calls `handle.Get()` to wait for result.
+
+### Step 3: Create `scenarios/standalone_activity.go`
+
+`GenericExecutor` whose `Execute` function:
+1. Calls `run.Client.ExecuteActivity()` with `StartActivityOptions` (ID derived from
+   run/execution/iteration, task queue from `run.TaskQueue()`, same timeout and retry policy).
+2. Passes activity type `"payload"` by name with `[]byte` (256 zeros) and `int32(256)`.
+3. Calls `handle.Get()` to wait for result.
+
+### Step 4: Create `commands.sh`
+
+Useful shell commands with terse comments for:
+- Local testing with `--embedded-server`
+- Cloud cell verification via `ct`
+- Running scenarios against `s-saa-cogs`
+
+### Step 5: Test locally
+
+- `go build ./...` and `go vet ./...`
+- `go run ./cmd list-scenarios` shows both new scenarios
+- SAW: `go run ./cmd run-scenario-with-worker --scenario workflow_with_single_activity --language go --iterations 5 --embedded-server`
+- SAA: same command with `standalone_activity` — will get "Standalone activity is disabled" from the
+  embedded dev server (v1.30.1 doesn't have the feature flag), confirming the code path reaches
+  `StartActivityExecution`. Will succeed on the cloud cell.
+
+### Step 6: Connect to cloud cell
+
+1. Verify cell: `ct kubectl --context s-saa-cogs get pods -n temporal`
+2. Check namespace: `ct admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e`
+3. Obtain operator TLS certs (from k8s secrets via `ct`, or ask Stephen)
+4. Point Grafana dashboard at `s-saa-cogs`, observe idle state
+5. Run worker + SAW scenario against the cell, observe activity in dashboard
+6. Run worker + SAA scenario, observe activity
+
+## Verification
+
+1. **Build**: `go build ./...` succeeds.
+2. **Lint/vet**: `go vet ./...` clean on our files.
+3. **List scenarios**: `go run ./cmd list-scenarios` includes both names.
+4. **Local test — SAW**: `run-scenario-with-worker --embedded-server --iterations 5` completes.
+5. **Local test — SAA**: Same command hits `StartActivityExecution` on the server (expected to fail
+   on dev server with "disabled" error; succeeds on cloud cell with CHASM enabled).
+6. **Cloud cell proof-of-concept**: Dashboard shows idle -> run scenario -> dashboard shows activity.
diff --git a/.task/research.md b/.task/research.md
new file mode 100644
index 00000000..cab82543
--- /dev/null
+++ b/.task/research.md
@@ -0,0 +1,159 @@
+# SAA COGS Experiment: Research & Design
+
+## 1. Current State of Omes
+
+### Architecture
+Omes is a load generation framework for Temporal. Scenarios are Go files in `scenarios/` that
+register via `init()` → `loadgen.MustRegisterScenario()`. The scenario name comes from the
+filename. Execution flows:
+
+1. `run-scenario` command: dials Temporal, runs scenario executor
+2. `run-worker` command: starts a worker (Go/Python/etc) polling a task queue
+3. `run-scenario-with-worker`: runs both together (local development)
+
+### Executor Types
+- `GenericExecutor`: takes a `func(ctx, *Run) error` — most flexible
+- `KitchenSinkExecutor`: wraps `GenericExecutor`, starts kitchen-sink workflows with configurable action sequences
+- `FuzzExecutor`: random action generation
+
+### Existing Standalone Activity Support
+Branch `standalone-activity` (commit `efbbb7f`) adds SAA to the `throughput_stress` scenario as
+an *optional extra activity within a workflow*. The implementation:
+
+1. Proto: `StandaloneActivity` message in `kitchen_sink.proto`
+2. Helper: `StandaloneActivity()` in `loadgen/kitchensink/helpers.go` creates an action
+3. Worker: `ExecuteStandaloneActivity()` in `workers/go/kitchensink/kitchen_sink.go` — called as a
+   *workflow activity* that internally calls `StartActivityExecution` + `PollActivityExecution`
+4. Scenario: enabled via `--option enable-standalone-activity=true`
+
+**Critical observation**: This existing support executes SAA *from within a workflow activity*.
+That is useful for testing SAA functionality but **not** for the COGS experiment. For COGS, we need
+to run SAA directly from the load generator (no workflow involved) so that the only server-side
+work is the standalone activity execution itself.
+
+## 2. What We Need for the COGS Experiment
+
+### Two New Scenarios
+
+**`saa_cogs_saw`** — Single Activity Workflow (the baseline):
+- Each iteration: start a workflow that executes one activity (payload: 256B in, 256B out), then completes
+- This is very close to `workflow_with_single_noop_activity` but with a payload activity
+
+**`saa_cogs_saa`** — Standalone Activity:
+- Each iteration: call `StartActivityExecution` directly from the load generator, then
+  `PollActivityExecution` to wait for the result
+- No workflow involved
+- Same activity (payload: 256B in, 256B out) and task queue
+- **Requires a `GenericExecutor`** since `KitchenSinkExecutor` always starts workflows
+
+Both scenarios must use the same worker (the Go worker with `payload` activity registered).
+
+### Key Design Decisions
+
+1. **Activity type**: `payload` with 256B input, 256B output (matching the COGS analysis)
+2. **No heartbeat, no retry** (matching the COGS analysis; retry max_attempts=1)
+3. **Fixed start rate** (not fixed concurrency) — controls for latency differences
+4. **Same task queue** for both scenarios — ensures same worker setup
+5. **Sync match preferred** — the COGS analysis assumes sync match; verify via metrics
+
+### SAA Load Generator Implementation
+
+The SAA scenario needs to call gRPC APIs directly. Looking at the existing
+`ExecuteStandaloneActivity` in the worker code (`workers/go/kitchensink/kitchen_sink.go:46-120`),
+we have a working reference. The scenario version should:
+
+1. Use `client.WorkflowService()` to get the gRPC client
+2. Call `StartActivityExecution` with the activity config
+3. Call `PollActivityExecution` to wait for completion
+4. This is a `GenericExecutor` with a custom `Execute` function
+
+## 3. Cloud Cell Operations
+
+### Connecting to a Cloud Cell
+
+From `bench-go.mdx`, the namespace format for test cells is `{cellId}-marathon.e2e` and the host
+is `{cellId}-marathon.e2e.tmprl-test.cloud:7233`. For our cell `s-saa-cogs`:
+- Namespace: `s-saa-cogs-marathon.e2e` (to be confirmed — Stephen may have set up differently)
+- Host: `s-saa-cogs-marathon.e2e.tmprl-test.cloud:7233`
+
+Omes connects via:
+```
+--server-address <host:port> --namespace <ns> --tls --tls-cert-path <cert> --tls-key-path <key>
+```
+
+Or with API key auth:
+```
+--server-address <host:port> --namespace <ns> --tls --auth-header "Bearer <api-key>"
+```
+
+### Running omes against a cloud cell
+
+Two options:
+1. **Local**: Run `go run ./cmd run-scenario` and `go run ./cmd run-worker` locally, connecting to
+   the cloud cell via TLS. Simplest for proof-of-concept. Higher latency (network round trip to
+   cloud) but the load generator itself isn't on the critical path for COGS measurement.
+2. **K8s pod**: Deploy omes worker as a pod on the cell's k8s cluster. Lower latency, more
+   realistic. The bench-go runbook shows this is the standard approach. Uses `omni scaffold` with
+   `--benchgo-enabled` or manual deployment.
+
+For initial proof-of-concept: run locally. For the actual experiment: deploy to k8s.
+
+### Grafana Dashboard
+
+The dashboard at `https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs` uses a `$cluster`
+variable. Set `cluster=s-saa-cogs` to point at our cell.
+
+### Cell Setup Verification
+
+Use `ct` / `omni` to verify cell state:
+```sh
+# Check cell status
+ct kubectl --context s-saa-cogs get pods -n temporal
+
+# Check namespace exists
+omni admintools --context s-saa-cogs -- temporal operator namespace describe s-saa-cogs-marathon.e2e
+```
+
+### Search Attributes
+
+Cloud cells cannot register search attributes via the SDK — they must be registered via the
+control plane. The `--do-not-register-search-attributes` flag exists for this. We should use it,
+and register `OmesExecutionID` separately if needed. For the simple COGS scenarios, we may not
+even need search attributes.
+
+## 4. Implementation Plan
+
+### Phase 1: Minimal Scenarios (omes code changes)
+
+1. Create `scenarios/saa_cogs_saw.go` — SAW scenario using `KitchenSinkExecutor`
+2. Create `scenarios/saa_cogs_saa.go` — SAA scenario using `GenericExecutor` with direct gRPC calls
+3. Both share config: payload size, start rate, duration
+
+### Phase 2: Local Proof-of-Concept
+
+1. Test both scenarios against local Temporal server
+2. Run `go run ./cmd run-scenario-with-worker` for SAW
+3. For SAA: run worker separately, then scenario (since SAA doesn't use workflows but the
+   worker still needs to poll for activity tasks)
+
+### Phase 3: Cloud Cell Connection
+
+1. Obtain credentials for s-saa-cogs cell
+2. Verify dashboard shows idle state
+3. Run a single SAW iteration and observe metrics
+4. Run a single SAA iteration and observe metrics
+
+### Phase 4: Full Experiment
+
+1. Deploy omes worker to cloud cell k8s
+2. Run SAW at target start rate for target duration
+3. Wait for cool-down, collect metrics
+4. Run SAA at same start rate for same duration
+5. Collect and compare metrics
+
+## 5. Open Questions
+
+- What namespace(s) are configured on s-saa-cogs?
+- How do we obtain TLS certs or API keys for the cell? (Check oncall or runbooks repos or search slack)
+- Does the cell have CHASM standalone activities enabled? (Dynamic config flag)
+- Worker deployment: should we use the existing bench-go infrastructure or deploy omes directly?
diff --git a/.task/saacogs.json b/.task/saacogs.json
new file mode 100644
index 00000000..bb30e263
--- /dev/null
+++ b/.task/saacogs.json
@@ -0,0 +1,703 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": { "type": "grafana", "uid": "-- Grafana --" },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "links": [],
+  "panels": [
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
+      "id": 1,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"frontend-.*\"})",
+          "legendFormat": "frontend",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"})",
+          "legendFormat": "history",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"matching-.*\"})",
+          "legendFormat": "matching",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "CPU per service (vCPU)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          },
+          "unit": "decbytes"
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
+      "id": 2,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"frontend\",workload_type=\"deployment\"}))",
+          "legendFormat": "frontend",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"history\",workload_type=\"deployment\"}))",
+          "legendFormat": "history",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"matching\",workload_type=\"deployment\"}))",
+          "legendFormat": "matching",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Memory per service (p50 working set)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 8 },
+      "id": 3,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"frontend\"}[$__rate_interval]))",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Frontend RPC by method (req/s)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 8 },
+      "id": 4,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval]))",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "History RPC by method (req/s)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 8 },
+      "id": 5,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (operation)(rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"matching\"}[$__rate_interval]))",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Matching RPC by method (req/s)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
+      "id": 6,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (table)(rate(cassandra_query{cluster=\"$cluster\",verb!=\"select\"}[$__rate_interval]))",
+          "legendFormat": "query: {{table}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (table)(rate(cassandra_batch{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "batch: {{table}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Astra writes by table (req/s)",
+      "description": "Validate r_Cass = 3/7 for writes. cassandra_query filtered to verb!=select; cassandra_batch is always writes.",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 },
+      "id": 7,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (table)(rate(cassandra_query{cluster=\"$cluster\",verb=\"select\"}[$__rate_interval]))",
+          "legendFormat": "{{table}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Astra reads by table (req/s)",
+      "description": "Reads are not expected to differ much between SAW and SAA (similar caching, ~1 read on creation).",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 },
+      "id": 8,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (walType)(rate(wal_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "{{walType}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "WAL operation rate by type (ops/s)",
+      "description": "Covers both reads and writes (no separate write-only metric). Expect HISTORY_EVENT_WAL activity for SAW only; both use MUTABLE_STATE_WAL.",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
+      "id": 9,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum by (operation)(rate(visibility_persistence_requests{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Visibility persistence rate by operation (ops/s)",
+      "description": "OSS visibility_persistence_requests counter, tagged by operation (RecordWorkflowExecutionStarted, RecordWorkflowExecutionClosed, UpsertWorkflowExecution, DeleteWorkflowExecution).",
+      "type": "timeseries"
+    },
+    {
+      "datasource": { "type": "prometheus", "uid": "${datasource}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": { "legend": false, "tooltip": false, "viz": false },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 80 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 32 },
+      "id": 10,
+      "options": {
+        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
+        "tooltip": { "mode": "single", "sort": "none" }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(rate(syncmatch_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "sync match",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${datasource}" },
+          "editorMode": "code",
+          "expr": "sum(rate(asyncmatch_latency_count{cluster=\"$cluster\"}[$__rate_interval]))",
+          "legendFormat": "async match",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Sync vs async match rate (matches/s)",
+      "description": "Health check on experimental conditions. Async match means tasks went through persistence/backlog rather than being dispatched directly to a waiting poller.",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "schemaVersion": 40,
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": { "text": "prod", "value": "prod" },
+        "name": "env",
+        "options": [
+          { "selected": true, "text": "prod", "value": "prod" },
+          { "selected": false, "text": "dev", "value": "test" }
+        ],
+        "query": "prod : prod, dev : test",
+        "type": "custom"
+      },
+      {
+        "current": { "text": "prod thanos", "value": "af7fe237-211e-413e-9723-41a73886bcbb" },
+        "hide": 2,
+        "includeAll": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "${env:text}.*",
+        "type": "datasource"
+      },
+      {
+        "current": {},
+        "datasource": { "type": "prometheus", "uid": "${datasource}" },
+        "definition": "label_values(restarts,cluster)",
+        "includeAll": false,
+        "label": "Cluster",
+        "name": "cluster",
+        "options": [],
+        "query": { "query": "label_values(restarts,cluster)", "refId": "StandardVariableQuery" },
+        "refresh": 2,
+        "regex": "",
+        "type": "query"
+      }
+    ]
+  },
+  "time": { "from": "now-3h", "to": "now" },
+  "timepicker": {},
+  "timezone": "utc",
+  "title": "SAA COGS",
+  "uid": "saacogs",
+  "version": 1,
+  "weekStart": ""
+}
diff --git a/.task/task.md b/.task/task.md
new file mode 100644
index 00000000..b9b47cc3
--- /dev/null
+++ b/.task/task.md
@@ -0,0 +1,764 @@
+For background context, please study the following documents carefully:
+
+START_DOCUMENT------------------------------------------------------------------------------
+# Temporal Activity Execution & saas-temporal Cloud Persistence: Implementation Overview
+
+## Part 1: Activity Execution Models in Temporal Server
+
+### 1.1 CHASM Standalone Activities (`chasm/lib/activity/`)
+
+CHASM standalone activities are first-class, independently-scheduled executions outside workflow context. They use **mutable state only** -- no history events.
+
+#### State Machine
+
+States defined in `chasm/lib/activity/proto/v1/activity_state.proto`:
+
+```
+UNSPECIFIED
+  → SCHEDULED
+    → STARTED
+      → COMPLETED (terminal)
+      → FAILED (terminal)
+      → CANCEL_REQUESTED → CANCELED (terminal)
+      → TIMED_OUT (terminal)
+      → TERMINATED (terminal)
+    → CANCEL_REQUESTED → CANCELED (terminal)
+    → TIMED_OUT (terminal)
+    → TERMINATED (terminal)
+    → SCHEDULED (retry path)
+```
+
+Lifecycle states (`activity.go:95-107`):
+- `LifecycleStateRunning`: SCHEDULED, STARTED, CANCEL_REQUESTED
+- `LifecycleStateCompleted`: COMPLETED
+- `LifecycleStateFailed`: FAILED, TERMINATED, TIMED_OUT, CANCELED
+
+#### State Transitions (`statemachine.go`)
+
+| Transition | From | To | Trigger |
+|---|---|---|---|
+| TransitionScheduled (37-77) | UNSPECIFIED | SCHEDULED | Initial scheduling |
+| TransitionRescheduled (87-127) | STARTED | SCHEDULED | Retry after failure |
+| TransitionStarted (130-169) | SCHEDULED | STARTED | Worker accepts task |
+| TransitionCompleted (177-202) | STARTED/CANCEL_REQUESTED | COMPLETED | Worker completes |
+| TransitionFailed (210-237) | STARTED/CANCEL_REQUESTED | FAILED | Non-retryable failure |
+| TransitionCancelRequested (278-295) | STARTED/SCHEDULED | CANCEL_REQUESTED | Cancel API called |
+| TransitionCanceled (304-331) | CANCEL_REQUESTED | CANCELED | Worker acknowledges cancel |
+| TransitionTerminated (246-275) | SCHEDULED/STARTED/CANCEL_REQUESTED | TERMINATED | Terminate API called |
+| TransitionTimedOut (340-374) | SCHEDULED/STARTED/CANCEL_REQUESTED | TIMED_OUT | Timer task fires |
+
+#### Mutable State Structures
+
+**ActivityState** (proto):
+- `activity_type`, `task_queue`, timeouts (`schedule_to_close`, `schedule_to_start`, `start_to_close`, `heartbeat`), `retry_policy`, `status`, `schedule_time`, `priority`, `cancel_state`, `terminate_state`
+
+**Activity Go Component** (`activity.go:52-68`):
+- `ActivityState` (embedded proto)
+- `Visibility: chasm.Field[*chasm.Visibility]` -- search attributes
+- `LastAttempt: chasm.Field[*ActivityAttemptState]` -- attempt count, stamp, started_time, failure details, worker identity
+- `LastHeartbeat: chasm.Field[*ActivityHeartbeatState]` -- heartbeat details and recorded_time
+- `RequestData: chasm.Field[*ActivityRequestData]` -- input, header, user_metadata
+- `Outcome: chasm.Field[*ActivityOutcome]` -- successful (output) or failed (failure)
+- `Store: chasm.ParentPtr[ActivityStore]` -- parent workflow (nil for standalone)
+
+#### Task Flow
+
+1. **Scheduling** (`handler.go:51-104`): `StartActivityExecution()` → creates Activity → applies TransitionScheduled
+2. **Dispatch** (`activity_tasks.go:21-79`): `activityDispatchTaskExecutor` pushes to matching service via `AddActivityTask()`
+3. **Start** (`activity.go:173-191`): `HandleStarted()` applies TransitionStarted, schedules start-to-close and heartbeat timeout tasks
+4. **Completion** (`activity.go:259-280`): `HandleCompleted()` applies TransitionCompleted
+5. **Failure** (`activity.go:284-323`): `HandleFailed()` checks retryability → either `tryReschedule()` or TransitionFailed
+6. **Heartbeat** (`activity.go:559-586`): Updates LastHeartbeat, reschedules heartbeat timeout task
+
+#### Timeout Tasks
+
+- **ScheduleToStartTimeoutTask** (`activity_tasks.go:81-116`): Non-retryable → TIMED_OUT
+- **ScheduleToCloseTimeoutTask** (`activity_tasks.go:118-150`): Non-retryable → TIMED_OUT
+- **StartToCloseTimeoutTask** (`activity_tasks.go:152-198`): Attempts retry via `tryReschedule()`; if not retryable → TIMED_OUT
+- **HeartbeatTimeoutTask** (`activity_tasks.go:200-276`): Validates heartbeat recency; attempts retry; if not retryable → TIMED_OUT
+
+#### Retry Logic
+
+- `shouldRetry()` (`activity.go:504-514`): Checks TransitionRescheduled possible, attempt < max, enough time remaining
+- `hasEnoughTimeForRetry()` (`activity.go:518-534`): Exponential backoff calculation against schedule-to-close deadline
+- `tryReschedule()` (`activity.go:489-502`): Applies TransitionRescheduled (increments attempt, schedules dispatch with backoff)
+
+#### Cancellation
+
+- `RequestCancelActivityExecution` (`handler.go:273-296`): Applies TransitionCancelRequested
+  - If SCHEDULED: immediately applies TransitionCanceled (`activity.go:414-433`)
+  - If STARTED: stays CANCEL_REQUESTED; worker receives cancellation on next interaction
+
+---
+
+### 1.2 Legacy Workflow Activities
+
+Activities executed as part of a workflow use **mutable state (ActivityInfo) plus history events**.
+
+#### History Events
+
+```
+EVENT_TYPE_ACTIVITY_TASK_SCHEDULED (10)
+EVENT_TYPE_ACTIVITY_TASK_STARTED (11)
+EVENT_TYPE_ACTIVITY_TASK_COMPLETED (12)
+EVENT_TYPE_ACTIVITY_TASK_FAILED (13)
+EVENT_TYPE_ACTIVITY_TASK_TIMED_OUT (14)
+EVENT_TYPE_ACTIVITY_TASK_CANCEL_REQUESTED (15)
+EVENT_TYPE_ACTIVITY_TASK_CANCELED (16)
+```
+
+#### ActivityInfo Mutable State (`persistence/v1/executions.proto:524-661`)
+
+Core: `activity_id`, `activity_type`, `task_queue`, `scheduled_time`, `started_time`, `started_event_id`, `scheduled_event_id`
+
+Timeouts: `schedule_to_close_timeout`, `schedule_to_start_timeout`, `start_to_close_timeout`, `heartbeat_timeout`
+
+Retry: `attempt`, `has_retry_policy`, `retry_initial_interval`, `retry_maximum_interval`, `retry_maximum_attempts`, `retry_backoff_coefficient`, `retry_expiration_time`, `retry_non_retryable_error_types`, `retry_last_failure`
+
+State flags: `cancel_requested`, `cancel_request_id`, `timer_task_status` (bit flags), `stamp`, `paused`, `pause_info`
+
+#### Pending Activity States (`activity.go:53-61`)
+
+- SCHEDULED: `StartedEventId == 0`
+- STARTED: `StartedEventId != 0 && !CancelRequested`
+- CANCEL_REQUESTED: `CancelRequested`
+- PAUSED: `Paused && Scheduled`
+- PAUSE_REQUESTED: `Paused && Started`
+
+#### Timer Task Status Flags
+
+```go
+TimerTaskStatusCreatedScheduleToStart = 1
+TimerTaskStatusCreatedScheduleToClose = 2
+TimerTaskStatusCreatedStartToClose    = 4
+TimerTaskStatusCreatedHeartbeat       = 8
+```
+
+#### Pause/Unpause/Reset (unique to legacy model)
+
+- **Pause** (`activity.go:254-284`): Sets `paused = true`, increments stamp if SCHEDULED
+- **Unpause** (`activity.go:388-425`): Clears pause, regenerates retry task if SCHEDULED
+- **Reset** (`activity.go:286-379`): Resets attempt to 1, optionally resets heartbeat/options
+
+#### API Handlers (`service/history/api/`)
+
+- `recordactivitytaskstarted/api.go`: Creates ActivityTaskStartedEvent
+- `respondactivitytaskcompleted/api.go`: Creates ActivityTaskCompletedEvent
+- `respondactivitytaskfailed/api.go`: Retry or ActivityTaskFailedEvent
+- `respondactivitytaskcanceled/api.go`: Creates ActivityTaskCanceledEvent
+- `recordactivitytaskheartbeat/api.go`: Updates heartbeat state, reschedules timeout
+
+---
+
+### 1.3 Activity Metrics (Both Models)
+
+Defined in `common/metrics/metric_defs.go`. Both models emit the same metric names.
+
+**Counters:**
+| Metric | Description |
+|---|---|
+| `activity_success` | Successful completions (excludes retries) |
+| `activity_fail` | Final failures (retries exhausted) |
+| `activity_task_fail` | Per-attempt failures (includes retries) |
+| `activity_cancel` | Canceled activities |
+| `activity_terminate` | Terminated activities (CHASM only) |
+| `activity_timeout` | Terminal timeouts |
+| `activity_task_timeout` | Per-timeout events (includes retries) |
+
+**Timers:**
+| Metric | Description |
+|---|---|
+| `activity_start_to_close_latency` | StartedTime → completion/failure/timeout |
+| `activity_schedule_to_close_latency` | ScheduleTime → completion/failure/timeout/cancel |
+
+**Tags:** `namespace`, `task_queue_family`, `operation`, `activity_type`, `versioning_behavior`, `workflow_type` (set to `__temporal_standalone_activity__` for CHASM). Timeout metrics additionally tagged with `timeout_type` (SCHEDULE_TO_START, SCHEDULE_TO_CLOSE, START_TO_CLOSE, HEARTBEAT).
+
+**Metric enrichment** (`activity.go:804-824`): `enrichMetricsHandler()` adds per-task-queue-family scoping via `metrics.GetPerTaskQueueFamilyScope()`.
+
+---
+
+### 1.4 Key Differences
+
+| Aspect | CHASM Standalone | Legacy Workflow |
+|---|---|---|
+| Persistence | Mutable state only | Mutable state + history events |
+| Parent context | Standalone execution | Part of workflow execution |
+| State tracking | ActivityState + sub-components | ActivityInfo in workflow |
+| Task dispatch | Direct to matching service | Via workflow task completion |
+| Completion storage | Outcome field | History events |
+| Cancellation | Explicit CANCEL_REQUESTED state | Boolean flag in ActivityInfo |
+| Pause support | Not yet implemented | Full (pause, unpause, reset) |
+| Search attributes | Visibility component (chasm) | Workflow search attributes |
+
+---
+
+## Part 2: saas-temporal Cloud Integration
+
+### 2.1 Architecture Overview
+
+saas-temporal wraps the Temporal server to run in Temporal Cloud cells by replacing core persistence with Cloud Data Storage (CDS), backed by:
+- **Datastax Astra Cassandra** for durable storage
+- **Write-Ahead Logs (WALs)** for durability before Cassandra persistence
+- **OpenSearch/Elasticsearch** for workflow visibility
+- **Tiered Storage** (S3/GCS/Azure) for history archival
+
+### 2.2 Entry Point and Server Construction
+
+**Main:** `cmd/temporal-service/main.go`
+
+The `start` command:
+1. Loads OSS Temporal configuration from YAML
+2. Injects secrets (Astra, Elasticsearch credentials)
+3. Sets up dynamic configuration
+4. Optionally enables cloud metrics handler (Chronicle)
+5. Configures authorization (SaaS Auth0 JWT + Temporal JWT)
+6. Configures custom datastore with CDS
+7. Creates server via `cds.NewServer()`
+
+**Server creation:** `cds/export/cds/server.go`:
+```go
+func NewServer(serviceFxOpts FxOptions, opts ...temporal.ServerOption) (temporal.Server, error) {
+    return newServerFx(TopLevelModule, serviceFxOpts, opts...)
+}
+```
+
+Uses Uber FX dependency injection with modules for persistence factory, dynamic config, serialization, and per-service modules (history, matching, frontend, worker).
+
+### 2.3 CDS Factory Architecture (`cds/export/cds/factory.go`)
+
+**FactoryProvider** (lines 51-65): Implements `client.AbstractDataStoreFactory`
+- `NumberOfShards`, `OrderedDatastoreConfigs` (shards → datastores)
+- `HistoryDatastoreConfigs` (weighted distribution)
+- `WALFollowerProviders` for WAL followers
+- `Clock`, `DynamicConfig`, `ChasmRegistry`
+
+**Factory**: Manages three WAL pools:
+- **MS WAL** (MutableState): Records mutable state mutations
+- **HE WAL** (HistoryEvent): Records history events
+- **LP WAL** (LargePayload): Records oversized payloads
+
+Plus store providers: `MultiDBStoreProvider` for ordinal datastores, separate history store provider with tiered storage, optional Walker integration.
+
+### 2.4 Astra Cassandra Integration (`cds/storage/cassandra/astra/`)
+
+**Session creation** (`gocql.go`): Wraps gocql with Astra-specific config (TLS, connection pooling, retry policies) via Datastax `gocql-astra`.
+
+**Query instrumentation** (`gocql_metrics.go:48-100`): `queryMetricsObserver` instruments every query with 150-entry LRU statement cache.
+
+**Cassandra Metrics:**
+| Metric | Description |
+|---|---|
+| `CassandraConns` | Connection count |
+| `CassandraQueryTotalLatency` | Query latency |
+| `CassandraBatchTotalLatency` | Batch latency |
+| `CassandraQuery` | Query count |
+| `CassandraBytesTx` / `CassandraBytesTx` | Network bytes |
+| `CassandraLargeResponse` / `CassandraLargeRequest` | Large payload detection |
+| `CassandraRetries` | Retry histogram |
+| `CassandraErrors` | Error counters |
+
+Tags: `OperationType` (INSERT/UPDATE/DELETE/SELECT), `TableName`, `CasTag` (CAS operation)
+
+### 2.5 Write-Ahead Logs (`cds/export/wal/`, `cds/stream/`)
+
+WALs provide durability guarantees before data reaches Cassandra.
+
+**WAL Client Interface** (`cds/export/wal/crud.go`):
+```go
+WriteMS(), WriteHE(), WriteLP()  // Write operations per pool
+ReadMS(), ReadHE(), ReadLP()     // Read operations per pool
+```
+
+**Configuration** (`cds/config/configs.go:46-140`):
+- Rate limiting: `WALReadsRate`, `WALReadsBurst`
+- Timeouts: `WALDialTimeout`, `WALReadTimeout`, `WALWriteTimeout`
+- Ledger rotation: `WALLedgerRotationBytesThreshold`, `WALLedgerRotationAgeThreshold`
+- Retention: `WALLedgerLifetime`
+- Parallelism: `WALMaxParallelReads`
+- Feature flags: `WALReadV2Enabled`, `WALV2EncodingEnabled`
+
+**WAL Metrics** (`cds/metrics/metrics.go:34-56`):
+| Metric | Description |
+|---|---|
+| `wal_latency` | Operation latency |
+| `wal_stream_dial_attempt/success/error` | Connection establishment |
+| `wal_stream_dns_latency` | DNS resolution |
+| `wal_stream_connect_latency` | TCP connect |
+| `wal_stream_handshake_latency` | TLS handshake |
+| `wal_stream_send/receive_latency` | I/O latency |
+| `wal_health_check_failed_count` | Connection health |
+| `wal_write_timeout_count` | Timeout tracking |
+| `wal_reader_page_latency` | Page read latency |
+| `wal_entries_per_read` | Batch size histogram |
+| `wal_compression_count` | Compression events |
+
+**Flush Metrics** (lines 13-27):
+| Metric | Description |
+|---|---|
+| `flush_latency` | Time to flush to persistence |
+| `flush_error` | Flush failures |
+| `flush_snapshot_aborts` | Snapshot abort count |
+| `flush_persistence_behindness_bytes/count/time` | Persistence lag |
+| `flush_time_since_last_persist` | Staleness |
+| `flush_reason_count` | Flush trigger reasons (by namespace) |
+
+**Recovery Metrics** (lines 57-70):
+| Metric | Description |
+|---|---|
+| `recovery_total_latency` | Full recovery duration |
+| `recovery_open_reader_latency` | Snapshot reader open |
+| `recovery_rate_limiter_latency` | Rate limiting delay |
+| `recovery_first_read_latency/bytes` | Initial WAL read |
+| `recovery_takeover_latency` | Takeover phase |
+| `recovery_wal_update_latency` | WAL update during recovery |
+
+**Ledger Metrics** (lines 77-82):
+| Metric | Description |
+|---|---|
+| `ledger_rotation_count` | Rotations |
+| `logs_per_ledger` | Logs per ledger histogram |
+| `segments_per_shard` | Segments per shard histogram |
+| `segment_too_old_count` | GC candidates |
+| `active_segment_too_old_count` | Rotation delay |
+
+### 2.6 Execution Store Wrapper (`cds/export/cds/execution_store.go`)
+
+Wraps the Cassandra execution store to:
+- Convert mutable state mutations to WAL records (`NewMSWALRecord()`)
+- Convert history events to WAL records (`NewHEWALRecord()`)
+- Calculate storage metering
+- Manage snapshot trimming
+- Implement history event caching
+
+Implements `persistence.ExecutionStore` and `persistence.ShardStore`.
+
+### 2.7 How Activity State Flows Through CDS
+
+**CHASM activities**: Activity mutable state → MS WAL write → Cassandra persistence. No HE WAL involvement (no history events). State transitions are persisted as mutable state mutations via the execution store wrapper.
+
+**Legacy workflow activities**: ActivityInfo mutable state → MS WAL write → Cassandra. History events (Scheduled, Started, Completed, etc.) → HE WAL write → Cassandra. Both paths go through the execution store wrapper's WAL record conversion.
+
+### 2.8 OpenSearch/Elasticsearch Visibility (`visibility/`)
+
+**Factory:** `visibility/factory.go` -- `VisibilityStoreFactory` creates visibility stores configured per cloud cell.
+
+**Batch processor metrics** (`visibility/common/metrics_defs.go`):
+| Metric | Description |
+|---|---|
+| `visibility_batch_processor_request_add_latency` | Enqueue time |
+| `visibility_batch_processor_request_latency` | Total request latency |
+| `visibility_batch_processor_request_errors` | Failed requests |
+| `visibility_batch_processor_commit_latency` | Batch commit time |
+| `visibility_batch_processor_batch_size` | Items per batch histogram |
+| `visibility_batch_processor_batch_requests` | Requests per batch histogram |
+| `visibility_batch_processor_queued_requests` | Queue depth histogram |
+| `visibility_batch_processor_corrupted_data` | Data integrity failures |
+| `visibility_batch_processor_duplicate_request` | Deduplication events |
+
+### 2.9 Tiered Storage (`cds/persistence/tieredstorage/`)
+
+Long-term history archival to cloud object stores:
+- S3 (AWS): `s3_store.go`
+- GCS (Google Cloud): `gcs_store.go`
+- Azure Blob: `azure_client.go`
+
+Interface: `Upload()`, `Read()`, `Delete()`, `List()`, `PluginName()`
+
+Metrics: `ReadWorkflowHistory`, `UploadWorkflowHistory`, `DeleteWorkflowHistory`, `ListTieredStorageObjects`
+
+### 2.10 Persistence Store Metrics (`cds/persistence/metrics/defs.go`)
+
+**Store layer** (lines 70-85):
+| Metric | Description |
+|---|---|
+| `store_requests` | Request count by operation |
+| `store_latency` | Operation latency |
+| `store_errors` | Errors: shard_exists, shard_ownership_lost, condition_failed, timeout, unavailable |
+
+**Manager layer** (lines 89-102):
+| Metric | Description |
+|---|---|
+| `saas_persistence_requests` | High-level request count |
+| `saas_persistence_latency` | High-level latency |
+| `saas_persistence_errors` | Error tracking |
+
+Tags: `operation` (CreateShard, UpdateShard, GetWorkflowExecution, etc.), `component`, `cass_cluster`
+
+### 2.11 Cloud Metrics Infrastructure
+
+**Handler chain** (`cloudmetricshandler/delegating_recorders.go`):
+1. `allowlistedRecorder`: Filters through allowlist
+2. `multiRecorder`: Sends to multiple backends
+
+**Chronicle integration** (`cloudmetricshandler/chronicle_recorder.go`):
+- Enabled by `TEMPORAL_ENABLE_CLOUDMETRICSHANDLER`
+- Config: `/etc/temporal/cloudmetricshandler`
+- Kubernetes enrichment: pod name, namespace, labels
+- Backends: S3 writer, HTTP writer (to Chronicle service)
+- Batch config: 50K queue, 25K batch, 100ms flush
+
+**Action metering** (`actionmetering/metrics.go`):
+- `billable_action_count` with tags: namespace, action_type, workflow_type, workflow_task_queue
+- Activity type/task queue currently placeholder `"_unknown_"` with TODOs for standalone activity support
+
+### 2.12 Additional Cloud Features
+
+- **Authorization**: SaaS Auth0 JWT + Temporal JWT, TLS client certs
+- **Quotas/Flow Control** (`quotas/`, `flowcontrol/`): Request-level and task-queue quotas
+- **Multi-region replication** (`cds/service/history/replication/`): Custom replication filters
+- **Metering V3**: S3/GCS/Azure bucket metering
+- **SMS (etcd)**: Secondary Metadata Store for namespace/cluster metadata
+- **Dynamic config**: 150+ hot-reloadable properties (`cds/config/configs.go`)
+END_DOCUMENT--------------------------------------------------------------------------------------
+
+START_DOCUMENT------------------------------------------------------------------------------
+# Standalone Activity COGS and margins
+
+@Dan Davison March 17, 2026
+
+We want to ensure that we are billing in a way that meets our target margins for new product features in cloud, such as new CHASM execution types. To do this, we need to know certain things about COGS (cost of goods sold) for these features. This document outlines how to estimate COGS for Standalone Activity relative to Workflow and the implications of this for margins.
+
+# Motivation: avoiding cannibalization
+
+We have rules (see [temporalio/action](https://github.com/temporalio/action)) specifying how customer operations map to billable Actions. For example, suppose a customer executes a Workflow that executes a single Activity, which succeeds on first attempt without heartbeating. This incurs 2 Actions (StartWorkflow and ScheduleActivity). We’ll call this a “Single Activity Workflow” (SAW).
+
+We haven’t yet decided how we will bill for Standalone Activity (SAA). But suppose that we decide that executing a single SAA (no retries, no heartbeating) is 1 Action (StartStandaloneActivity).
+
+If we want SAA margins to match SAW margins, then we want the COGS of SAA (no retries, no heartbeating) to be ≤ 1/2 that of SAW (because we get half as much revenue for the SAA). If it is not, then there would be some degree of cannibalization (customers switch their single-activity workloads to SAA, but our margins there are worse). We’d hope it would be offset by increased volume, but we’d still prefer SAA margins to match SAW.
+
+### What about retries and heartbeating?
+
+SAW (no retries and no heartbeating) is 2 Actions. If the activity retries once it becomes 3 Actions (ScheduleActivity now happens twice); if it heartbeats once during each attempt it becomes 5 Actions.
+
+Let’s assume (as we currently intend) that we apply the same billing rules to Standalone Activity retries and heartbeating. Then, as long as SAA is not worse than Workflow Activity with respect to COGS of retries and heartbeating, our margins from those customer operations will be at least as good under SAA as when they are done in the context of a pre-CHASM workflow. CHASM has been designed for efficiency; we have reason to be optimistic that it’s not *worse* than the legacy workflow activity implementation.
+
+# Problem statement
+
+The above suggests that we should focus on estimating the ratio of COGS for Standalone Activity (SAA) relative to Single-activity Workflow (SAW) in the no retries, no heartbeating case:
+
+$$
+R = \frac{C_{SAA}}{C_{SAW}}.
+$$
+
+We expect $R < 1$ because SAA achieves execution of an activity with fewer RPCs, persistence operations, etc, than SAW. We are hoping that it is less than 1/2 since then our SAA margins are as good or better than our workflow margins, assuming we bill 1 Action for SAA.
+
+# Estimating the COGS ratio
+
+We’ll assume that the COGS for a SAA or SAW execution results solely from invoices from third parties relating to cloud compute resources. COGS for an execution type (SAA or SAW) is the sum of price ($p$) times quantity consumed ($q$) over all resources:
+
+$$
+C = \sum_{i} p_i q_i.
+$$
+
+We want the COGS ratio $R$. We can write that as a weighted average of per-resource usage ratios:
+
+$$
+R = \frac{C_{SAA}}{C_{SAW}} = \sum_i f_i r_i.
+$$
+
+This allows us to calculate $R$ as a function of two things that we can estimate:
+
+- $f_i = p_i q_{i}(SAW) / \sum_j p_j q_{j}(SAW)$ is the fraction of SAW COGS attributable to resource $i$ (“spend share”). We’ll use our current cloud spend for this.
+- $r_i = q_i(SAA) / q_i(SAW)$ is the per-resource usage ratio. We will estimate these by comparing the implementations or by running experiments in cloud cells.
+
+The resources ($i$) potentially include:
+1. Data egress
+2. CPU usage
+3. Memory usage
+4. Persistence operations against our WALs
+5. Persistence operations against Astra (to be replaced by Walker)
+6. Persistence operations against OpenSearch (visibility)
+7. Metrics/logs processing and storage costs, Clickhouse
+
+*At-rest data storage is excluded: we bill customers separately for storage on a GB/h basis, so it does not need to be subsidized by Actions. (Tangentially, it’s worth noting that we expect SAA storage to cost users half what they’d pay for SAW since SAW stores the input and output payloads in both workflow scheduled/complete events and activity scheduled/complete events.)*
+
+# Per-resource usage ratios
+
+To proceed, we need to estimate the SAW vs SAA usage ratio ($r_i$) for each resource.
+
+The following table summarizes the two implementations. It describes the simplest possible happy-path scenario: an activity that succeeds on first attempt without heartbeating, via sync matches.
+
+| # | Single-activity Workflow | Standalone Activity |
+| --- | --- | --- |
+| 1 | RPC: `StartWorkflowExecution` => HEWAL, MSWAL; Vis&; Cassandra& | RPC: `StartActivityExecution` => MSWAL; Vis&; Cassandra& |
+| 2 | Task => RPC: `AddWorkflowTask` |  |
+| 3 | RPC: `RecordWorkflowTaskStarted` => HEWAL, MSWAL; Cassandra& |  |
+| 4 | RPC: `RespondWorkflowTaskCompleted` => HEWAL, MSWAL; Cassandra& |  |
+| 5 | Task => RPC: `AddActivityTask` | Task => RPC: `AddActivityTask` |
+| 6 | RPC: `RecordActivityTaskStarted` => HEWAL, MSWAL; Cassandra& | RPC: `RecordActivityTaskStarted` => MSWAL; Cassandra& |
+| 7 | RPC: `RespondActivityTaskCompleted` => HEWAL, MSWAL; Cassandra& | RPC: `RespondActivityTaskCompleted` => MSWAL; Vis&; Cassandra& |
+| 8 | Task => RPC: `AddWorkflowTask` |  |
+| 9 | RPC: `RecordWorkflowTaskStarted` => HEWAL, MSWAL; Cassandra& |  |
+| 10 | RPC: `RespondWorkflowTaskCompleted` => HEWAL, MSWAL; Vis&; Cassandra& |  |
+- `&` indicates a write that’s not on the sync response path
+- `AddWorkflowTask` and `AddActivityTask` involve inter-service RPCs but no persistence writes in the happy path (“sync match”).
+- The table does not show worker poll requests
+- An additional `Vis&` is incurred in both cases when the execution is deleted.
+
+Comparing the implementations in the table gives
+
+$$
+r_{\text{WAL}} = \frac{3}{14} = 0.21,~~~~
+r_{\text{Cass}} = \frac{3}{7} = 0.43,~~~~
+r_{\text{Vis}} = \frac{3}{3} = 1.0.~~~~
+$$
+
+These ratios count writes only. Cassandra reads are not expected to differ much between SAW and SAA since they use similar caching mechanics with the result that a high proportiion of both SAW and SAA executions incur ~1 read (on execution creation);.
+
+In addition, we can estimate data transfer costs by comparing the implementations. These are likely dominated by egress to customer infra (ingress is free on AWS and GCP; data transfers to Astra, OpenSearch, and Grafana are in-VPC or via PrivateLink). Let the activity input and output payload sizes be $S_I$ and $S_O$. Payload egress for SAW is $2S_I + 2S_O$ (input payload sent to workflow and activity workers; output payload sent to workflow worker and client). For SAA this is $S_I + S_O$ since there is no workflow worker detour. This gives
+
+$$
+r_\text{data\_transfer} = 0.5.
+$$
+
+# COGS ratio estimate
+
+Using approximate/preliminary cloud spend share numbers (thanks @Stephen Chan ) we have:
+
+| **Resource** | **Spend share $f_i$ (preliminary)** | **Usage ratio $r_i$** | **Notes** |
+| --- | --- | --- | --- |
+| **Astra writes** | 40% | $\frac{3}{7}$ = 0.43 | SAW does 2 additional writes for each WFT |
+| **Visibility** (OpenSearch) | 20% | $\frac{3}{3}$ = 1.00 | Equal — both SAA and SAW produce exactly ~~2~~ 3 visibility updates |
+| **WAL writes** | 10% | $\frac{3}{14}$ = 0.21 | Half of Astra ratio: SAA writes only to MSWAL, whereas SAW writes to both HEWAL and MSWAL |
+| **EC2 compute** | 10% | ? | Would need cloud cell experiment |
+| **Data transfer** | 10% | $\frac{1}{2}$ = 0.50 | SAW sends payloads via workflow worker round-trip; SAA does not |
+| **Overheads** (incl. Clickhouse) | 10% | ? |  |
+
+This gives the following estimate of the COGS ratio:
+
+$$
+\begin{align*}
+R &=
+\underbrace{0.4 \times 0.43}_{\text{Astra}:~0.17} +
+\underbrace{0.2 \times 1.0}_{\text{Vis}:~0.20} +
+\underbrace{0.1 \times 0.21}_{\text{WAL}:~0.02} +
+\underbrace{0.1 \times 0.50}_{\text{Tx}:~0.05} +
+0.1 \cdot r_\text{compute} + 0.1 \cdot r_\text{overhead} \\\\
+&=
+0.44 + 0.1(r_\text{compute} + r_\text{overhead}).
+\end{align*}
+$$
+
+# Sensitivity analysis
+
+Before thinking about the implications of this for billing and margins, the next steps are:
+
+1. Refine the cloud spend estimates (Cloud Capacity team; does not involve load experiments)
+2. Decide whether we want to do load experiments to estimate $r_\text{compute}$
+3. Decide how we will address $r_\text{overhead}$
+
+For (2) and (3) we can do some initial sensitivity analysis:
+
+SAW does 10 RPCs vs SAA’s 4 (with 7 vs 3 of them doing persistence writes in the sync-match case). If services are CPU-bound then this suggests that $0.4 < r_\text{compute} < 1.0$ might be reasonable.
+
+The other overheads include (per @Stephen Chan ) Clickhouse, observability cells, and Envoy proxies. Since these costs should also scale with RPC count, let’s assume the same bounds: $0.4 < r_\text{overhead} < 1.0$. This gives:
+
+$$
+0.52 \leq R \leq 0.64.
+$$
+
+![image.png](.task/sensitivity.png)
+
+For example, if SAW margins were 70%, SAA margins would be 62% - 69%. This margin reduction would affect at maximum the ~3% of workflows that are SAW.
+
+- COGS ratio to margins conversion formula
+
+     $\text{margin}_{\text{SAA}} = 1 - 2R(1 - \text{margin}_{\text{SAW}})$.
+
+
+# Discussion
+
+- **Visibility limits SAA margins**. Visibility is expensive (20%), but SAA and SAW perform the same number of visibility writes, so it combines a large weight with the worst possible ratio.
+- **(Unfavorable) Over-provisioning would push $R$ up.** The usage ratios above for persistence are derived from write counts, which only translate to cost savings if capacity tracks usage. But e.g. Astra is bought in fixed hardware units (“Astra Classic”). If any resource component is over-provisioned then SAA and SAW would pay the same cost per execution and $r_i \to 1.0$, making SAA margins less attractive relative to workflow.
+- **Cloud spend share**. We could attempt to separate fixed costs and renormalize (see [Next steps](https://www.notion.so/Next-steps-3268fc567738805e82ddd9c1e1d4c9d1?pvs=21)). This would be favorable to SAA margins if it decreases the visibility share, but unfavorable if it decreases Astra share.
+
+    We’re estimating $f_i$ from cloud spend, so we’re assuming that the spend distribution for single-activity workflows would be similar to the spend distribution for the real mix of customer workflows. I suspect this is a reasonable modeling assumption since in both cases the application is performing the same state transitions in response to workflow and activity task processing.
+
+- **(Mixed) Effect of migration to Walker**. Walker replaces Astra with storage that is under our own control, making right-sizing easier. This may mean that the 3/7 write ratio is more fully realized under Walker, moving SAA COGS away from SAW. However, Walker will be cheaper than Astra, so persistence’s share of spend shrinks. Since persistence is where SAA has its largest advantage, this would bring SAA COGS closer to SAW.
+
+    These two effects act in opposite directions and the net result will depends on their relative magnitudes. This suggests that we should monitor COGS calculations as the Walker migration proceeds.
+
+- **(Future) A visibility backend migration would improve SAA margins.** There has been [movement](https://www.notion.so/Visibility-CDS-2a98fc567738807e9ee0f318edc4c16f?pvs=21) toward replacing OpenSearch. As discussed above, any reduction in visibility spend share would make SAA COGS more attractive relative to workflow.
+
+# Conclusion
+
+- [We are planning to bill SAA at 1/2 the price of SAW](https://www.notion.so/PRD-Standalone-Activities-for-durable-job-processing-1ee8fc567738806d8b6fe8e2eeae0fc4?pvs=21). Although there are various assumptions involved, at this point it looks like SAA COGS will be more than 1/2 SAW COGS: the estimated range above is $0.52 \leq R \leq 0.64$. This implies that some degree of cannibalization is likely. The extent of cannibalization would be bounded by the proportion of current workloads that are SAW, which is 3% per @Phil Prasek. It may be offset by volume growth attributable to SAA.
+
+# Next steps
+
+- **Refine cloud spend share estimates.**
+
+    The cloud spend share weights used in this analysis are supposed to be marginal costs. We could attempt to separate marginal vs fixed costs and renormalize our spend share percentages. This would be favorable to SAA margins if it decreases the visibility share, but unfavorable if it decreases Astra share.
+
+- **Investigate any impact of over-provisioning.**
+
+    SAA margins may be less favorable than the calculations suggest if some resources are over-provisioned. See discussion [above](https://www.notion.so/Standalone-Activity-COGS-and-margins-3268fc567738803cb63fd9397ffd351c?pvs=21).
+
+- **Decide whether to do cloud cell experiments**.
+
+    Unlike the other resource categories, we lack any obvious theoretical basis for estimating  $r_\text{compute}$ and $r_\text{overhead}$. Estimating $r_\text{compute}$ via cloud cell experiments would require perhaps one engineer-week.  If this were to show a value close to 0.4 then it would suggest that the upper bound on $R$ is 0.56, as opposed to the current 0.64. This would however still be subject to all the assumptions discussed above. We could also attempt to tighten our estimated bounds on $r_\text{overhead}$ via experiment.
+
+    If we decide to do this, the $r_\text{compute}$ experiment would be something like the following: choose a reference activity (e.g. sleeps for 10s, no heartbeating, never fails) and run SAA and SAW workloads on a cloud cell at a fixed start rate (e.g. 10/s) for a sustained period (e.g. 1hr). Fixing start rate rather than concurrency naturally controls for end-to-end latency differences between SAA and SAW.  $r_\text{cpu}$ and $r_\text{memory}$ can then be estimated from metrics as the ratio of mean utilization above the idle baseline. The analysis will need to decide how to combine them, e.g. based on which is more often limiting; alternatively, using the larger of the two would yield a conservative calculation.
+END_DOCUMENT------------------------------------------------------------------------------
+
+START_DOCUMENT------------------------------------------------------------------------------
+# Test plan for SAA COGS measurement
+
+@Dan Davison March 19, 2026
+
+The [SAA COGS proposal](.task/saa-cogs.md) made an initial estimate of the SAA/SAW COGS ratio based on estimating persistence, visibility, and data transfer usage ratios directly from the implementation. But for compute and overheads we have no analytical estimate. We plan to run an experiment to:
+
+1. Estimate the missing $r_\text{compute}$.
+2. Validate the analytical $r_i$ against observed metrics
+
+For comparison, the Fairness COGS experiment docs:
+
+- [Test plan](https://www.notion.so/temporalio/Test-plan-for-COGS-measurement-28c8fc56773880169cdcc4087a98ceaf)
+- [Fairness COGS Impact](https://www.notion.so/temporalio/Fairness-COGS-Impact-2c58fc567738808f806cfbf09b771b2c)
+- [Pricing Council doc](https://www.notion.so/temporalio/WIP-Pricing-Council-Fairness-COGS-Impact-2cc8fc56773880dcb3efe435623edd9a)
+
+
+
+
+# Proposed SAA experiment
+
+
+## Workloads
+
+Two workloads, run sequentially on the same cell:
+
+1. **SAW**: execute workflow with one activity (no heartbeat, no retry).
+2. **SAA**: execute standalone activity (no heartbeat, no retry).
+
+## Parameters
+
+**Start rate.** I think that we should fix start rate rather than concurrency, since this naturally controls for end-to-end latency differences between SAA and SAW (i.e. a cell running SAW will see higher load because the concurrency will be higher because the SAW end-to-end latency is higher). The fairness experiment used 4k tasks/s. Is starting 4k executions/s reasonable for us?
+
+**Activity.** Immediate successful return; no heartbeat, no retry. We could compare with a 1s sleep to see if result differ?
+
+**Sync match.** Do one run such that sync match should be 100%, and another tuned such that sync match is lower? Verify sync match from metrics (`syncmatch_latency`, `asyncmatch_latency`)
+
+**Duration and repetitions.** Steady-state load; we need long enough for stable CPU averages. The
+fairness experiment used 6h per scenario but this was maybe because of their more sophisticated
+sinusoidal load design? 1h more than enough for the SAA experiment? ≥2 runs per workload to check
+variance/reproducibility.
+
+## Infrastructure
+
+- Anything special about test cell sizing?
+- Workers should run outside the cell (how did fairness experiment do this?)
+
+## Metrics
+
+Initial dashboard content https://grafana.tmprl-internal.cloud/d/saacogs/saa-cogs:
+
+
+- **CPU per service** (frontend, history, matching). `node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate` — a k8s recording rule over cAdvisor container metrics (defined in saas-components prometheus rules).
+- **Memory per service**. `container_memory_working_set_bytes` — also k8s/cAdvisor (defined in saas-components alert rules).
+- **RPC rate by method**, one panel per service (frontend, history, matching). `service_requests` counter ([temporal:common/metrics/metric_defs.go:615](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)), tagged with `operation` (the RPC method name). Recorded by a gRPC server-side interceptor ([telemetry.go:177](https://github.com/temporalio/temporal/blob/main/common/rpc/interceptor/telemetry.go)), so it captures inter-service RPCs (e.g. history→matching `AddActivityTask`).
+- **Astra writes by table**. `cassandra_query` counter with `verb!="select"`, plus `cassandra_batch` counter, both broken down by `table`. Tags include `operation`, `table`, `verb`, `cas` ([saas-temporal:cds/metrics/metrics.go:233,238](https://github.com/temporalio/saas-temporal/blob/main/cds/metrics/metrics.go)).
+- **Astra reads by table**. `cassandra_query` with `verb="select"`, broken down by `table`.
+- **WAL operation rate by type**. `wal_latency_count` ([saas-temporal:cds/metrics/metrics.go:35](https://github.com/temporalio/saas-temporal/blob/main/cds/metrics/metrics.go)) broken down by `walType` label (values: `MUTABLE_STATE_WAL`, `HISTORY_EVENT_WAL`, `LARGE_PAYLOAD_WAL` — see [saas-temporal:cds/common/tag/tag.go:11-24](https://github.com/temporalio/saas-temporal/blob/main/cds/common/tag/tag.go)). Note: this metric covers both reads and writes; there is no separate write-only WAL metric. This is arguably more relevant to COGS since WAL reads also cost something.
+- **Visibility persistence rate by operation**. `visibility_persistence_requests` counter ([temporal:common/metrics/metric_defs.go:1398](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)), tagged with `operation` (values include `RecordWorkflowExecutionStarted`, `RecordWorkflowExecutionClosed`, `UpsertWorkflowExecution`, `DeleteWorkflowExecution` — see [visiblity_manager_metrics.go](https://github.com/temporalio/temporal/blob/main/common/persistence/visibility/visiblity_manager_metrics.go)).
+- **Sync vs async match rate**. `syncmatch_latency_count` and `asyncmatch_latency_count` ([temporal:common/metrics/metric_defs.go:1119-1120](https://github.com/temporalio/temporal/blob/main/common/metrics/metric_defs.go)).
+
+
+## Load generator (omes)
+
+- Add a new scenario that starts standalone activities directly from the load generator, not from within a workflow.
+- Build the omes Go worker Docker image and deploy it as a pod on k8s, configured to poll the test cell. Do we have implementation we can borrow from the fairness experiment?
+
+
+
+
+<details>
+<summary>Appendix: Comparison with fairness experiment (see commits by David Reiss)</summary>
+
+| | Fairness | SAA |
+|---|---|---|
+| **Treatments** | Same workload, two matcher modes | Two execution types (SAW vs SAA) |
+| **Quantity computed** | $\Delta C / C$ | Ratio $r_i = q_i(\text{SAA}) / q_i(\text{SAW})$ |
+| **Load shape** | Sinusoidal backlog (exercises matcher) | Steady-state at fixed start rate (our model assumes sync match) |
+| **What is measured** | CPU per service, Astra operation rates | CPU per service, memory per service, Astra operation rates by table and verb, WAL write rates, visibility write rates, RPC handling rates per service per method |
+| **Predictions to validate** | None — purely empirical | $r_\text{Cass} = 3/7$, $r_\text{WAL} = 3/14$, $r_\text{Vis} = 3/3$, per-method RPC rates matching proposal table |
+
+Fixed start rate (not fixed task throughput) because SAA and SAW generate different numbers of tasks per execution.
+
+**Question**: what is the incremental COGS of enabling the fairness matcher vs the classic matcher?
+
+**COGS components**: (1) Astra queries (~35% of total COGS), (2) EC2 compute (~9%, split across frontend+matching and history). Ignored: data transfer, Astra storage, non-AWS costs (Clickhouse <3%).
+
+**Setup**: dedicated test cell `s-oss-dnr-faircogs3` (64 partitions). Load generator: Omes Ebb and Flow — sinusoidal activity task backlog. 5 scenarios (classic, fairness with 0/1k/100k keys, priority), each 6 hours. Measured via [dedicated Grafana dashboard](https://grafana.tmprl-internal.cloud/d/df6pldpkiy1vka/faircogs).
+
+**Results**: Astra showed no significant increase. CPU increased up to 23% (frontend) and 36% (history) in the worst case (1k fairness keys). COGS impact: $(0.035 \times 0.23) + (0.057 \times 0.36) = 2.8\%$. Pricing council recommendation: price fairness on value to customer, not COGS.
+
+
+
+
+
+</details>
+
+<details>
+<summary>Appendix: possible experimental outcomes</summary>
+
+- **Analytical predictions confirmed, $R$ in predicted range.** Observed $r_\text{Cass}$, $r_\text{WAL}$, $r_\text{Vis}$, and per-method RPC rates match the analytical derivations. $r_\text{compute}$ lands in $[0.4, 1.0]$, giving $R$ in roughly $0.52$–$0.64$. We present $R$ with a tighter confidence interval than the proposal (because $r_\text{compute}$ is now estimated, not bounded).
+- **$r_\text{compute}$ is low, pushing $R$ toward 0.5.** If $r_\text{compute} \approx 0.4$ and analytical predictions hold, $R \approx 0.52$. Cannibalization is near-zero.
+- **Observed $r_i$ diverge from analytical predictions.** Some assumption is wrong (e.g. sync match doesn't hold at test load, or there are unaccounted persistence writes). We recompute $R$ using observed values and identify which assumption failed and whether it reflects production conditions or a test artifact.
+- **$R$ is higher than predicted.** $R > 0.64$ would mean worse cannibalization than estimated. Options: accept the margin reduction (bounded by ~3% SAW share), adjust billing, or identify engineering work to reduce SAA COGS.
+
+</details>
+
+END_DOCUMENT------------------------------------------------------------------------------
+
+
+Your task is to help me design and build the omes-based tooling that we will use to perform the experiments outlined above to learn about COGS of SAA an SAW. We are in the omes repo; study it carefully. Our work will broadly break into the following phases that we must design holistically:
+
+(1) Add any missing omes functionality that will be needed in order to be able to use omes to generate the SAA and SAW load for the experiments.
+(2) Run the experiments against the cloud cell that Stephen has prepared: its name is s-saa-cogs.
+
+Stephen linked to the 'scaffold' run that created the cell. I see it had the following input:
+
+{
+  "CellConfig": {
+    "Identity": {
+      "Location": {
+        "CloudProvider": "aws",
+        "AccountID": "124355634071",
+        "Region": "us-west-2"
+      },
+      "ID": "s-saa-cogs"
+    },
+    "Template": "v5-aws-dev",
+    "ServerVersion": "v3.151.9_oss1.31.0_151.6",
+    "AgentVersion": "v3.151.9_oss1.31.0_151.6",
+    "WebVersion": "v2.47.0",
+    "GoCanaryVersion": "v1.35.0",
+    "ComponentVersion": "v2026-03-20.00",
+    "WalVersion": "v10.0.3",
+    "EnableMetering": false
+  },
+  "FailurePolicy": 1
+}
+
+and output:
+
+{
+  "Cell": {
+    "Identity": {
+      "Location": {
+        "CloudProvider": "aws",
+        "AccountID": "124355634071",
+        "Region": "us-west-2"
+      },
+      "ID": "s-saa-cogs"
+    }
+  }
+}
+
+I am not familiar with performing operations against cloud cells, so you will need to resarch and help me during this. But we have several good resources: study the contents of the 'oncall' and 'runbooks' repos, and also use the /agent-slack skill. You also have Notion and Temporal Docs MCP. Use the more modern 'ct' rather than its alias 'omni'.
+
+Initial grafana dashboard JSON is at .task/saacogs.json.
+
+Important: I'd like an early aim to be to get an end-to-end proof-of-principle of this working. Therefore let's not make the omes component sophisticated initially; just the bare minimum to run an SAW and SAA workload. But I am a bit intimidated by doing anything with the cloud cell since I don't know how. So I guess one early aim is to be able to point our metrics dashboard at s-saa-cogs, and see idle state, then run one of our omes commands, and see activity increase in the dashboard. Please maintain a file of useful shell commands with terse comments where necessary. I will run them and show you the outut. Don't do operations against cloud or observability yourself unless I explicitly ask you to.
+
+In the omes work, we must use the latest version of the SDK with Standalone Activity support, such that our code is consistent with what Temporal Docs (use MCP) and the samples-go repo show.
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
new file mode 120000
index 00000000..81744092
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1 @@
+.task/AGENTS.md
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 00000000..81744092
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+.task/AGENTS.md
\ No newline at end of file

From 3a35b44711c5a927f6566129859b9370654c3830 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Tue, 24 Mar 2026 16:34:23 -0400
Subject: [PATCH 28/40] Add some other dashboards

---
 .task/faircogs.json |   799 ++
 .task/history.json  | 27751 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 28550 insertions(+)
 create mode 100644 .task/faircogs.json
 create mode 100644 .task/history.json

diff --git a/.task/faircogs.json b/.task/faircogs.json
new file mode 100644
index 00000000..e11dd1da
--- /dev/null
+++ b/.task/faircogs.json
@@ -0,0 +1,799 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 971,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(service_requests{cluster=\"s-oss-dnr-faircogs3\",temporal_service_type=\"matching\"}[1m]))",
+          "legendFormat": "Matching RPS",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Matching RPS",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": ["Value"],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(service_requests{cluster=\"s-oss-dnr-faircogs3\",temporal_service_type=\"history\"}[1m]))",
+          "legendFormat": "History RPS",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "History RPS",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "decbytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "adhocFilters": [],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"s-oss-dnr-faircogs3\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"s-oss-dnr-faircogs3\",namespace=\"temporal\",workload=\"matching\",workload_type=\"deployment\"}))",
+          "format": "time_series",
+          "hide": false,
+          "interval": "",
+          "intervalFactor": 2,
+          "legendFormat": "p50",
+          "range": true,
+          "refId": "D",
+          "step": 10
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"s-oss-dnr-faircogs3\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"s-oss-dnr-faircogs3\",namespace=\"temporal\",workload=\"frontend\",workload_type=\"deployment\"}))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"s-oss-dnr-faircogs3\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"s-oss-dnr-faircogs3\",namespace=\"temporal\",workload=\"history\",workload_type=\"deployment\"}))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "p50 mem usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 24
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "adhocFilters": [],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(cassandra_query{cluster=\"s-oss-dnr-faircogs3\"} [$__rate_interval])) + sum(rate(cassandra_batch{cluster=\"s-oss-dnr-faircogs3\"} [$__rate_interval]))",
+          "interval": "",
+          "key": "Q-348416b5-2a03-42f3-bdc8-5fbd4a2f6bcf-0",
+          "legendFormat": "rps",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "adhocFilters": [],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(cassandra_query{cluster=\"s-oss-dnr-faircogs3\",table=\"tasks\"} [$__rate_interval])) + sum(rate(cassandra_batch{cluster=\"s-oss-dnr-faircogs3\",table=\"tasks\"} [$__rate_interval]))",
+          "hide": false,
+          "instant": false,
+          "interval": "",
+          "key": "Q-ca13377f-be45-41e0-af3b-4bc8861ee6fb-1",
+          "legendFormat": "tasks rps",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "(\n  sum(\n    rate(\n      cassandra_query{\n        cluster=\"s-oss-dnr-faircogs3\",\n        table=\"tasks_v2\"\n      }[$__rate_interval]\n    )\n  )\n+\n  sum(\n    rate(\n      cassandra_batch{\n        cluster=\"s-oss-dnr-faircogs3\",\n        table=\"tasks_v2\"\n      }[$__rate_interval]\n    )\n  )\n) OR on() vector(0)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "tasks_v2 rps",
+          "range": true,
+          "refId": "C"
+        },
+        {
+          "datasource": {
+            "name": "Expression",
+            "type": "__expr__",
+            "uid": "__expr__"
+          },
+          "expression": "$B + $C",
+          "hide": false,
+          "refId": "tasks + tasks_v2 RPS",
+          "type": "math"
+        }
+      ],
+      "title": "Astra RPS",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 32
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "adhocFilters": [],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(cassandra_query{cluster=\"s-oss-dnr-faircogs3\",table=\"tasks_v2\"} [$__rate_interval])) + sum(rate(cassandra_batch{cluster=\"s-oss-dnr-faircogs3\",table=\"tasks_v2\"} [$__rate_interval]))",
+          "hide": true,
+          "interval": "",
+          "key": "Q-348416b5-2a03-42f3-bdc8-5fbd4a2f6bcf-0",
+          "legendFormat": "rps",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "adhocFilters": [],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "sum by (temporal_namespace, task_priority)(\n  rate(\n    approximate_backlog_count{\n      cluster=\"s-oss-dnr-faircogs3\",\n      temporal_service_type=\"matching\",\n      task_type=\"Activity\",\n      temporal_namespace=~\"faircogs.*\"\n    }[$__rate_interval]\n  )\n)\n",
+          "hide": true,
+          "instant": false,
+          "interval": "",
+          "key": "Q-ca13377f-be45-41e0-af3b-4bc8861ee6fb-1",
+          "legendFormat": "backlog count rate",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "name": "Expression",
+            "type": "__expr__",
+            "uid": "__expr__"
+          },
+          "expression": "$A / $B",
+          "hide": false,
+          "refId": "tasks_v2_divided_by_backlog_size",
+          "type": "math"
+        }
+      ],
+      "title": "tasks_v2 RPS vs backlog increase rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": ["matching avg CPU"],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 40
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "adhocFilters": [],
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "avg(sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"s-oss-dnr-faircogs3\", pod=~\"matching-.*\"}) by (node))",
+          "hide": false,
+          "instant": false,
+          "interval": "",
+          "legendFormat": "matching avg CPU",
+          "range": true,
+          "refId": "F"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "avg(sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"s-oss-dnr-faircogs3\", pod=~\"frontend-.*\"}) by (node))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "frontend avg CPU",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "ef667c0e-d08c-4b40-9761-479514828632"
+          },
+          "editorMode": "code",
+          "expr": "avg(sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"s-oss-dnr-faircogs3\", pod=~\"history-.*\"}) by (node))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "history avg CPU",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Avg CPU usage",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "schemaVersion": 40,
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-30m",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "utc",
+  "title": "faircogs",
+  "uid": "df6pldpkiy1vka",
+  "version": 31,
+  "weekStart": ""
+}
diff --git a/.task/history.json b/.task/history.json
new file mode 100644
index 00000000..b4c89407
--- /dev/null
+++ b/.task/history.json
@@ -0,0 +1,27751 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "loki",
+          "uid": "e008932a-e9dc-4b7a-819f-68b662f3dc51"
+        },
+        "enable": true,
+        "expr": "{cluster=\"newton\",k8s_app=\"cell-worker\"} |= \"deploying temporal service\" | pattern `<time>\t<level>\t<code>\t<msg>\t<j>` | line_format \"{{.j}}\" | json | CellID = \"$cluster\" | Service = \"history\" | line_format \"history deployment started\" | label_format Attempt=Attempt,ComponentTag=ComponentTag,Namespace=Namespace,ImageRepo=Params_ImageRepo,ImageTag=Params_ImageTag,Params_Replicas=Params_Replicas,RunID=RunID,Service=Service,TaskQueue=TaskQueue,WorkerID=WorkerID,WorkflowID=WorkflowID,WorkflowType=WorkflowType,cluster=cluster,code=code,env=env,j=j,k8s_app=k8s_app,k8s_container=k8s_container,k8s_namespace=k8s_namespace,k8s_node_name=k8s_node_name,k8s_pod=k8s_pod,level=level",
+        "hide": false,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "instant": false,
+        "name": "History Deployment",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": 520,
+  "links": [],
+  "panels": [
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 341,
+      "panels": [],
+      "title": "Kubernetes",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "max": 1,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 0,
+        "y": 1
+      },
+      "id": 340,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "quantile(1.0, sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"}) by (node) / on (node) machine_cpu_cores{cluster=\"$cluster\"})",
+          "hide": true,
+          "instant": false,
+          "legendFormat": "p100",
+          "range": true,
+          "refId": "D"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.99, sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"}) by (node) / on (node) machine_cpu_cores{cluster=\"$cluster\"})",
+          "hide": true,
+          "instant": false,
+          "legendFormat": "p99",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.95, sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"}) by (node) / on (node) machine_cpu_cores{cluster=\"$cluster\"})",
+          "hide": true,
+          "instant": false,
+          "legendFormat": "p95",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.50, sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"}) by (node) / on (node) machine_cpu_cores{cluster=\"$cluster\"})",
+          "hide": true,
+          "instant": false,
+          "legendFormat": "p50",
+          "range": true,
+          "refId": "C"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "1 - min(\n  label_replace(avg without (cpu) (rate(node_cpu_seconds_total{cluster=\"$cluster\",mode=\"idle\"}[$rate])), \n    \"node\", \"$1\", \n    \"instance\", \"(.*)\") \n  * on (node) group_left (label_k8s_tmprl_cloud_history) \n    max by (node) (kube_node_labels{cluster=\"$cluster\",label_k8s_tmprl_cloud_history=\"true\"})   \n)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "max",
+          "range": true,
+          "refId": "E"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "1 - max(\n  label_replace(avg without (cpu) (rate(node_cpu_seconds_total{cluster=\"$cluster\",mode=\"idle\"}[$rate])), \n    \"node\", \"$1\", \n    \"instance\", \"(.*)\") \n  * on (node) group_left (label_k8s_tmprl_cloud_history) \n    max by (node) (kube_node_labels{cluster=\"$cluster\",label_k8s_tmprl_cloud_history=\"true\"})   \n)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "min",
+          "range": true,
+          "refId": "F"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "1 - avg(\n  label_replace(avg without (cpu) (rate(node_cpu_seconds_total{cluster=\"$cluster\",mode=\"idle\"}[$rate])), \n    \"node\", \"$1\", \n    \"instance\", \"(.*)\") \n  * on (node) group_left (label_k8s_tmprl_cloud_history) \n    max by (node) (kube_node_labels{cluster=\"$cluster\",label_k8s_tmprl_cloud_history=\"true\"})   \n)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "avg",
+          "range": true,
+          "refId": "G"
+        }
+      ],
+      "title": "Total node CPU Usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "description": "Summed by node, so if multiple frontend pods exist on a node their CPU usage will be summed.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 8,
+        "y": 1
+      },
+      "id": 388,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "max(sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"}) by (node))",
+          "instant": false,
+          "legendFormat": "max",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "min(sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"}) by (node))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "min",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "avg(sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", pod=~\"history-.*\"}) by (node))",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "avg",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "History Service CPU usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "$datasource"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "bytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 16,
+        "y": 1
+      },
+      "id": 339,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "quantile(1.0, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"history\",workload_type=\"deployment\"}))",
+          "format": "time_series",
+          "hide": false,
+          "intervalFactor": 2,
+          "legendFormat": "P100",
+          "range": true,
+          "refId": "C",
+          "step": 10
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.99, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"history\",workload_type=\"deployment\"}))",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "p99",
+          "range": true,
+          "refId": "A",
+          "step": 10
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.95, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"history\",workload_type=\"deployment\"}))",
+          "format": "time_series",
+          "hide": false,
+          "intervalFactor": 2,
+          "legendFormat": "p95",
+          "range": true,
+          "refId": "B",
+          "step": 10
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "quantile(0.5, sum by (pod) (container_memory_working_set_bytes{cluster=\"$cluster\",container!=\"\",image!=\"\",namespace=\"temporal\"} * on (namespace, pod) group_left (workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"temporal\",workload=\"history\",workload_type=\"deployment\"}))",
+          "format": "time_series",
+          "hide": false,
+          "intervalFactor": 2,
+          "legendFormat": "p50",
+          "range": true,
+          "refId": "D",
+          "step": 10
+        }
+      ],
+      "title": "Memory Usage",
+      "type": "timeseries"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 9
+      },
+      "id": 7,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 10
+          },
+          "id": 2,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.13.0",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, operation, cluster,\n    sum(events_count) / greatest($__interval_s, 30) AS rps\nFROM global_metric_service_latency_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='service_latency'\n    AND cluster='$cluster'\n    AND service='history'\n    -- AND namespace='ns.account'\nGROUP BY time, metric, operation, cluster\nORDER BY time\n",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum by (operation) (rate(service_requests{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:request_per_op:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(service_errors{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:service_error:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "errors",
+              "refId": "B"
+            }
+          ],
+          "title": "Requests VS Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 10
+          },
+          "id": 4,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (error_type) (rate(service_error_with_type{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:history_service_error_with_type:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{ error_type }}",
+              "range": true,
+              "refId": "D"
+            }
+          ],
+          "title": "Errors Break Down",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 19
+          },
+          "id": 263,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp_interval) as time,\n    operation,\n    sum(events_sum) / sum(events_count)/1000 as avg\nFROM global_metric_service_latency_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='service_latency'\n    AND cluster = '$cluster'\n    -- AND namespace='ns.account'\n    AND service = 'history'\nGROUP BY time, operation\n-- , namespace\nORDER BY time",
+              "refId": "chronicle-query-Ytvm"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum by (operation) (rate(service_latency_sum{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(service_latency_count{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:history_service_latency_by_operation:histogram_quantile_1m{cluster=\"$cluster\", quantile=\"0.5\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 19
+          },
+          "id": 357,
+          "options": {
+            "legend": {
+              "calcs": ["max", "last"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true,
+              "sortBy": "Max",
+              "sortDesc": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (resource_exhausted_scope,resource_exhausted_cause,temporal_namespace) (rate(service_errors_resource_exhausted{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{resource_exhausted_scope}}_{{resource_exhausted_cause}}_{{temporal_namespace}}",
+              "range": true,
+              "refId": "D"
+            }
+          ],
+          "title": "Resource Exhausted",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 260
+          },
+          "id": 264,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.12.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp_interval) as time,\n    operation,\n    quantileInterpolatedWeighted(${quantile})(bucket, events_count)/1000 AS \"p$quantile\"\nFROM global_metric_service_latency_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='service_latency'\n    AND cluster = '$cluster'\n    -- AND namespace='ns.account'\n    AND service = 'history'\n    AND operation IN ('StartWorkflowExecution','SignalWorkflowExecution','SignalWithStartWorkflowExecution','RespondWorkflowTaskCompleted','RespondActivityTaskCompleted','StartActivityExecution')\nGROUP BY time, operation\n-- , namespace\nORDER BY time\n",
+              "refId": "chronicle-query-FofP"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(service_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:history_service_latency_by_operation:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\",operation=~\"StartWorkflowExecution|SignalWorkflowExecution|SignalWithStartWorkflowExecution|RespondWorkflowTaskCompleted|RespondActivityTaskCompleted\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "$quantile Latency Key APIs",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 260
+          },
+          "id": 355,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": ["last", "max"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "desc"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp_interval) as time,\n    operation,\n    quantileInterpolatedWeighted(${quantile})(bucket, events_count)/1000 AS \"p$quantile\"\nFROM global_metric_service_latency_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='service_latency'\n    AND cluster = '$cluster'\n    -- AND namespace='ns.account'\n    AND service = 'history'\nGROUP BY time, operation\n-- , namespace\nORDER BY time",
+              "refId": "chronicle-query-mrjB"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(service_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:history_service_latency_by_operation:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "$quantile Latency",
+          "type": "timeseries"
+        }
+      ],
+      "title": "History Service",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 10
+      },
+      "id": 351,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.action_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 9284
+          },
+          "id": 350,
+          "options": {
+            "legend": {
+              "calcs": ["lastNotNull", "mean", "max"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true,
+              "sortBy": "Mean",
+              "sortDesc": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(rate(saas_actions{cluster=\"$cluster\",failed_with_system_error=\"false\",namespace_mode=\"active\"}[$rate]))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{action_type}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.8.2",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    sum(events_sum) / greatest($__interval_s, 30) as aps\nFROM global_metric_saas_actions_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '$cluster'\n    AND failed_with_system_error = 'false'\n    AND namespace_mode = 'active'\nGROUP BY time\nORDER BY time\n",
+              "refId": "B"
+            }
+          ],
+          "title": "Total APS",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.namespace}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "__systemRef": "hideSeriesFrom",
+                "matcher": {
+                  "id": "byNames",
+                  "options": {
+                    "mode": "exclude",
+                    "names": ["hbl-gcp-imps-in-out-perf.ijt0s"],
+                    "prefix": "All except:",
+                    "readOnly": true
+                  }
+                },
+                "properties": [
+                  {
+                    "id": "custom.hideFrom",
+                    "value": {
+                      "legend": false,
+                      "tooltip": false,
+                      "viz": true
+                    }
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 9284
+          },
+          "id": 393,
+          "options": {
+            "legend": {
+              "calcs": ["lastNotNull", "mean", "max"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true,
+              "sortBy": "Mean",
+              "sortDesc": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(rate(saas_actions{cluster=\"$cluster\",failed_with_system_error=\"false\"}[$rate])) by (temporal_namespace)",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{action_type}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.8.2",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    namespace,\n    sum(events_sum) / greatest($__interval_s, 30) as aps\nFROM global_metric_saas_actions_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '${cluster}'\n    AND failed_with_system_error = 'false'\n    AND namespace_mode = 'active'\nGROUP BY time, namespace\nORDER BY time\n",
+              "refId": "B"
+            }
+          ],
+          "title": "Actions By Namespace",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.action_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 9293
+          },
+          "id": 394,
+          "options": {
+            "legend": {
+              "calcs": ["lastNotNull", "mean", "max"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true,
+              "sortBy": "Mean",
+              "sortDesc": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(rate(saas_actions{cluster=\"$cluster\",failed_with_system_error=\"false\"}[$rate])) by (action_type)",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{action_type}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.8.2",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    action_type,\n    sum(events_sum) / greatest($__interval_s, 30) as aps\nFROM global_metric_saas_actions_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '$cluster'\n    AND failed_with_system_error = 'false'\n    AND namespace_mode = 'active'\nGROUP BY time, action_type\nORDER BY time\n",
+              "refId": "B"
+            }
+          ],
+          "title": "Actions By Type",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.namespace}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 9293
+          },
+          "id": 391,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(rate(saas_actions_throttled{cluster=\"$cluster\"}[$rate])) by (temporal_namespace)",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{action_type}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.8.2",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    namespace,\n    sum(events_sum) / greatest($__interval_s, 30) as aps\nFROM global_metric_saas_actions_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '${cluster}'\n    AND metric = 'saas_actions_throttled'\nGROUP BY time, namespace\nORDER BY time\n",
+              "refId": "B"
+            }
+          ],
+          "title": "Throttled Actions By Namespace",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.action_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 9302
+          },
+          "id": 349,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(rate(saas_actions_throttled{cluster=\"$cluster\"}[$rate])) by (action_type)",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{action_type}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.8.2",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    action_type,\n    sum(events_sum) / greatest($__interval_s, 30) as aps\nFROM global_metric_saas_actions_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '${cluster}'\n    AND metric = 'saas_actions_throttled'\nGROUP BY time, action_type\nORDER BY time\n",
+              "refId": "B"
+            }
+          ],
+          "title": "Throttled Actions By Type",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Actions",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 11
+      },
+      "id": 67,
+      "panels": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 18398
+          },
+          "id": 65,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(acquire_shards_count{cluster=\"$cluster\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:acquire_shards_count:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "acquire",
+              "refId": "A"
+            }
+          ],
+          "title": "Acquire Shards Count",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 18398
+          },
+          "id": 69,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#histogram_quantile(0.95, sum by (le) (rate(acquire_shards_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:acquire_shards_latency:histogram_quantile{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "acquire_latency",
+              "refId": "A"
+            }
+          ],
+          "title": "P95 Acquire Shards Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 18406
+          },
+          "id": 71,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(shard_closed_count{cluster=\"$cluster\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:shard_closed_count:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "shards_closed",
+              "refId": "A"
+            }
+          ],
+          "title": "Shards Closed",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 18406
+          },
+          "id": 73,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(sharditem_created_count{cluster=\"$cluster\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:sharditem_created_count:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "item_created",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(sharditem_removed_count{cluster=\"$cluster\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:sharditem_removed_count:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "item_removed",
+              "refId": "B"
+            }
+          ],
+          "title": "Shard Item Counts",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 18414
+          },
+          "id": 199,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(rate(lock_latency_sum{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate])) / sum(rate(lock_latency_count{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "P50",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "histogram_quantile(0.95, sum by (le) (rate(lock_latency_bucket{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate])))",
+              "interval": "",
+              "legendFormat": "P95",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(lock_latency_bucket{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate])))",
+              "interval": "",
+              "legendFormat": "P99",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "histogram_quantile(1, sum by (le) (rate(lock_latency_bucket{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P100",
+              "refId": "F"
+            }
+          ],
+          "title": "P* Shard Lock Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 18414
+          },
+          "id": 200,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(rate(replication_tasks_lag_sum{cluster=\"$cluster\",operation=\"ReplicatorQueueProcessor\",temporal_service_type=\"history\"}[$rate])) / sum(rate(replication_tasks_lag_count{cluster=\"$cluster\",operation=\"ReplicatorQueueProcessor\",temporal_service_type=\"history\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "Avg replication tasks lag",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.95, sum by (le) (rate(replication_tasks_lag_bucket{cluster=\"$cluster\",operation=\"ReplicatorQueueProcessor\",temporal_service_type=\"history\"}[$rate])))",
+              "interval": "",
+              "legendFormat": "P95 replication tasks lag",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(replication_tasks_lag_bucket{cluster=\"$cluster\",operation=\"ReplicatorQueueProcessor\",temporal_service_type=\"history\"}[$rate])))",
+              "interval": "",
+              "legendFormat": "P99 replication tasks lag",
+              "refId": "C"
+            }
+          ],
+          "title": "P* Replication Lag",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 18422
+          },
+          "id": 336,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(rate(semaphore_latency_sum{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate])) / sum(rate(semaphore_latency_count{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "P50",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.95, sum by (le) (rate(semaphore_latency_bucket{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate])))",
+              "interval": "",
+              "legendFormat": "P95",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(semaphore_latency_bucket{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate])))",
+              "interval": "",
+              "legendFormat": "P99",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(1, sum by (le) (rate(semaphore_latency_bucket{cluster=\"$cluster\",operation=\"ShardInfo\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P100",
+              "range": true,
+              "refId": "F"
+            }
+          ],
+          "title": "P* Shard Semaphore Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 18422
+          },
+          "id": 332,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.50, sum(rate(persistence_shard_rps_bucket{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval])) by (le))",
+              "instant": false,
+              "legendFormat": "P50",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.95, sum(rate(persistence_shard_rps_bucket{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval])) by (le))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "P95",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum(rate(persistence_shard_rps_bucket{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval])) by (le))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "P99",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1.00, sum(rate(persistence_shard_rps_bucket{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval])) by (le))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "P100",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": false,
+              "expr": "sum(rate(persistence_shard_rps_sum{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval])) / sum(rate(persistence_shard_rps_count{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "Average",
+              "range": true,
+              "refId": "E"
+            }
+          ],
+          "title": "P* Shard RPS",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Shard Controller",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 12
+      },
+      "id": 204,
+      "panels": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 15069
+          },
+          "id": 219,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum by (temporal_namespace) (rate(schedule_to_start_timeout{cluster=\"$cluster\",operation=\"TimerActiveTaskActivityTimeout\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:schedule_to_start_timeout_non_sticky:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{temporal_namespace}}",
+              "refId": "A"
+            }
+          ],
+          "title": "ActivityTask ScheduleToStart Timeout",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 15069
+          },
+          "id": 218,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum by (temporal_namespace) (rate(schedule_to_start_timeout{cluster=\"$cluster\",operation=\"TimerActiveTaskWorkflowTaskTimeout\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:schedule_to_start_timeout_sticky:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{temporal_namespace}}",
+              "refId": "A"
+            }
+          ],
+          "title": "WorkflowTask ScheduleToStart Timeout (sticky)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 15078
+          },
+          "id": 220,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum by (temporal_namespace) (rate(start_to_close_timeout{cluster=\"$cluster\",operation=\"TimerActiveTaskActivityTimeout\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:activity_task_starttoclose_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{temporal_namespace}}",
+              "refId": "A"
+            }
+          ],
+          "title": "ActivityTask StartToClose Timeout",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 15078
+          },
+          "id": 221,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum by (temporal_namespace) (rate(start_to_close_timeout{cluster=\"$cluster\",operation=\"TimerActiveTaskWorkflowTaskTimeout\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflowtask_starttoclose_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{temporal_namespace}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "WorkflowTask StartToClose Timeout",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 15201
+          },
+          "id": 205,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum by (temporal_namespace) (rate(schedule_to_close_timeout{cluster=\"$cluster\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:activity_scheduletoclose_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{temporal_namespace}}",
+              "refId": "A"
+            }
+          ],
+          "title": "Activity ScheduleToClose Timeout",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 15201
+          },
+          "id": 353,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum by (temporal_namespace) (rate(start_to_close_timeout{cluster=\"$cluster\",operation=\"TimerActiveTaskSpeculativeWorkflowTaskTimeout\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:speculative_workflowtask_starttoclose_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{temporal_namespace}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Speculative WorkflowTask StartToClose Timeout",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.namespace}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 15210
+          },
+          "id": 337,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "# Disabling this query since we'll be removing temporal_namespace from this timeseries imminently (2024-05-09)\n# Use the clickhouse query below instead to look into it at a namespace level\nhistogram_quantile($quantile, sum by (temporal_namespace, le) (rate(task_schedule_to_start_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{temporal_namespace}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.0.6",
+              "queryType": "timeseries",
+              "rawSql": "/*\n  Uncomment the namespace in the WHERE clause to filter to a specific namespace.\n*/\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as latency_ms\nFROM global_metric_schedule_to_start_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  /* AND namespace='my_namespace.account_id' */\ngroup by\n  time, namespace\norder by time asc",
+              "refId": "B"
+            }
+          ],
+          "title": "$quantile Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 15210
+          },
+          "id": 222,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum by (temporal_namespace) (rate(heartbeat_timeout{cluster=\"$cluster\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:activity_heartbeat_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{temporal_namespace}}",
+              "refId": "A"
+            }
+          ],
+          "title": "Activity Heartbeat Timeout",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Task Timeouts",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 13
+      },
+      "id": 55,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.name} ${__field.labels.namespace}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4222
+          },
+          "id": 52,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_requests{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_tasks:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "requests",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TransferActive%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_tasks_error:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "errors",
+              "refId": "C"
+            }
+          ],
+          "title": "Active Transfer Task Requests Vs Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4222
+          },
+          "id": 185,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_tasks_discarded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "discarded",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors_limit_exceeded_counter{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_tasks_limit_exceeded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_workflow_busy{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:active_tasks_workflow_busy:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "workflow_busy",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_throttled{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate])) by (resource_exhausted_cause)\n# use the raw query above if needed\n# temporal:v1:active_tasks_workflow_throttled:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "throttled - {{resource_exhausted_cause}}",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_not_active_counter{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "namespace_not_active",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_dependency_task_not_completed{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "dependency_task_not_completed",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_namespace_handover{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "namespace_handover",
+              "range": true,
+              "refId": "G"
+            }
+          ],
+          "title": "Active Transfer Errors Breakdown",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.labels.namespace} ${__field.labels.operation}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5296
+          },
+          "id": 53,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_requests{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_tasks_by_op:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TransferActive%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Active Transfer Task By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5296
+          },
+          "id": 187,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_errors{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_tasks_error_by_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Active Transfer Task Unexpected Errors By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.task_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5305
+          },
+          "id": 62,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    cluster, task_type, /* namespace, */\n    sum(events_sum) / sum(events_count) as avg\nFROM global_metric_task_attempt_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_attempt'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND task_type LIKE 'TransferActive%'\nGROUP BY time, cluster, task_type /*, namespace*/\nORDER BY time\n",
+              "refId": "chronicle-query-2DqS"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum by (operation, le) (rate(task_attempt_sum{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate])) / sum by (operation, le) (rate(task_attempt_count{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:avg_active_tasks:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Transfer Task Attempt",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.task_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5305
+          },
+          "id": 211,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  quantileInterpolatedWeighted(${quantile})(bucket, events_count) AS \"p$quantile\"\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'TransferActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-e0FB"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_attempt_bucket{cluster=\"$cluster\",operation=~\"TransferActive.*\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_transfer_task_attempt:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Transfer Task Attempt",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5314
+          },
+          "id": 59,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TransferActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-eyzl"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(task_latency_processing_sum{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_processing_count{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_active_tasks_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Transfer Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5314
+          },
+          "id": 132,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_processing'\n  AND cluster='$cluster'\n  AND operation LIKE 'TransferActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time\n",
+              "refId": "chronicle-query-SDG4"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_processing_bucket{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_transfer_task_latency_processing:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Transfer Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5323
+          },
+          "id": 133,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TransferActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-DkuH"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(task_latency_sum{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_count{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_active_tasks_overall_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Transfer Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5323
+          },
+          "id": 223,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'TransferActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-fgRh"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_bucket{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_transfer_task_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Transfer Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5332
+          },
+          "id": 134,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#(sum by (operation) (rate(task_latency_queue_sum{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_queue_count{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])))*1000\n# use the raw query above if needed\n# To filter by namespace, hide this Prometheus query and instead use the Clickhouse query below.\ntemporal:v0:avg_active_tasks_e2e_latency:rate1m{cluster=\"$cluster\"}*1000",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  operation, /* namespace, */\n  (sum(sum_total_duration_ms)\n  /\n  sum(sum_bucket_events_count) ) as value_ms\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND operation LIKE 'TransferActive%'\n  AND metric='task_latency_queue'\n  /* \n  AND namespace='REPLACE_ME'\n  */\nGROUP BY \n  time, operation\n  /*, namespace*/\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Avg Active Transfer Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5332
+          },
+          "id": 135,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_queue_bucket{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])))*1000\n# use the raw query above if needed\ntemporal:v1:active_transfer_task_e2e_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}*1000",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) AS time,\n  operation, /* namespace, */\n  quantileInterpolatedWeighted(${quantile})(bucket, sum_bucket_events_count) AS quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND operation LIKE 'TransferActive%'\n  AND metric='task_latency_queue'\n  /* \n  AND namespace='prod.infra'\n  */\nGROUP BY\n  time, operation\n  /*, namespace*/\nORDER BY\n  time ASC",
+              "refId": "B"
+            }
+          ],
+          "title": "Percentile $quantile Active Transfer Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a task to be picked up by a worker pool goroutine.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5341
+          },
+          "id": 291,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_schedule'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TransferActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-J9Dw"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_latency_schedule_sum{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_schedule_count{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:active_transfer_task_schedule_latency:histogram_quantile_1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Transfer Task Schedule Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a task to be picked up by a worker pool goroutine.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5341
+          },
+          "id": 292,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_schedule'\n  AND cluster='$cluster'\n  AND operation LIKE 'TransferActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-R5vN"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_schedule_bucket{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_transfer_task_schedule_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Transfer Task Schedule Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from task scheduled execution time to load time.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5350
+          },
+          "id": 293,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_load'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TransferActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-k3TW"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_latency_load_sum{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_load_count{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:active_transfer_task_load_latency:histogram_quantile_1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Transfer Task Load Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from task scheduled execution time to load time.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5350
+          },
+          "id": 294,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_load'\n  AND cluster='$cluster'\n  AND operation LIKE 'TransferActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-O0Uc"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_load_bucket{cluster=\"$cluster\",operation=~\"TransferActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_transfer_task_load_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Transfer Task Load Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5359
+          },
+          "id": 136,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum by (operation) (rate(shardinfo_immediate_queue_lag_sum{cluster=\"$cluster\",task_category=\"transfer\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(shardinfo_immediate_queue_lag_count{cluster=\"$cluster\",task_category=\"transfer\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:transfer_queue_lag_distribution:histogram_quantile_1m{quantile=\"0.5\", cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "p50",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.7, sum by (operation, le) (rate(shardinfo_immediate_queue_lag_bucket{cluster=\"$cluster\",task_category=\"transfer\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:transfer_queue_lag_distribution:histogram_quantile_1m{quantile=\"0.7\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p70",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.9, sum by (operation, le) (rate(shardinfo_immediate_queue_lag_bucket{cluster=\"$cluster\",task_category=\"transfer\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:transfer_queue_lag_distribution:histogram_quantile_1m{quantile=\"0.9\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p90",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.99, sum by (operation, le) (rate(shardinfo_immediate_queue_lag_bucket{cluster=\"$cluster\",task_category=\"transfer\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:transfer_queue_lag_distribution:histogram_quantile_1m{quantile=\"0.99\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p99",
+              "range": true,
+              "refId": "G"
+            }
+          ],
+          "title": "Transfer Queue Lag distribution",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "violations / second",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5359
+          },
+          "id": 323,
+          "links": [
+            {
+              "targetBlank": true,
+              "title": "Workflow Size Limits Monitoring Documentation",
+              "url": "https://www.notion.so/temporalio/Workflow-Size-Limits-877a2b2d4f74457d9533f0f976ca442a"
+            }
+          ],
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum(rate(wf_too_many_pending_child_workflows{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:wf_size_limit_violations_pending_child_workflows:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "Pending Child Workflows",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum(rate(wf_too_many_pending_activities{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:wf_size_limit_violations_pending_activities:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "Pending Activities",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum(rate(wf_too_many_pending_cancel_requests{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:wf_size_limit_violations_pending_cancel_requests:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "Pending Cancel Requests",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum(rate(wf_too_many_pending_external_workflow_signals{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:wf_size_limit_violations_pending_external_workflow_signals:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "Pending Outgoing Signals",
+              "range": true,
+              "refId": "D"
+            }
+          ],
+          "title": "Workflow Size Limit Violations",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Transfer Active Tasks Processing",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 14
+      },
+      "id": 151,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.name} ${__field.labels.namespace}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 10136
+          },
+          "id": 154,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_requests{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_tasks:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "requests",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TransferStandby%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporval:v1:transfer_passive_tasks_errors:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "errors",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Passive Transfer Task Requests Vs Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 10136
+          },
+          "id": 60,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_tasks_discarded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "discarded",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_limit_exceeded_counter{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporval:v1:transfer_passive_tasks_error_limit_exceeded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_workflow_busy{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporval:v1:transfer_passive_tasks_error_workflow_busy:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "workflow_busy",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_throttled{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporval:v1:transfer_passive_tasks_error_throttled:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "throttled",
+              "range": true,
+              "refId": "D"
+            }
+          ],
+          "title": "Passive Transfer Errors Breakdown",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.labels.namespace} ${__field.labels.operation}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 10181
+          },
+          "id": 156,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_requests{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_tasks_by_op:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TransferStandby%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation\n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Passive Transfer Task By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 10181
+          },
+          "id": 57,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_errors{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_tasks_error_by_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Passive Transfer Task Unexpected Errors By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 24,
+            "x": 0,
+            "y": 10222
+          },
+          "id": 168,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"TransferStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_tasks_discarded_by_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Passive Transfer Task Discarded",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 10231
+          },
+          "id": 345,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted(0.99)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'TransferStandby%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-FB6j"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by(operation, le) (rate(task_latency_bucket{operation=~\"TransferStandby.*\", temporal_service_type=\"history\", cluster=\"$cluster\"}[$rate])))",
+              "hide": true,
+              "instant": true,
+              "interval": "",
+              "key": "Q-b3321027-a9ec-4f1c-82ff-fc18e2be8bb8-0",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Transfer Standby Task Latency P99",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 10231
+          },
+          "id": 346,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "# To filter by namespace, hide this Prometheus query and instead use the Clickhouse query below.\nhistogram_quantile(0.99, sum by(operation, le) (rate(task_latency_queue_bucket{operation=~\"TransferStandby.*\", temporal_service_type=\"history\", cluster=\"$cluster\"}[$rate])))*1000",
+              "hide": true,
+              "instant": true,
+              "interval": "",
+              "key": "Q-b3321027-a9ec-4f1c-82ff-fc18e2be8bb8-0",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) AS time,\n  operation, /* namespace, */\n  quantileInterpolatedWeighted(0.99)(bucket, sum_bucket_events_count) AS quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_latency_queue'\n  AND operation LIKE 'TransferStandby%'\n  /* \n  AND namespace='prod.infra'\n  */\nGROUP BY\n  time, operation, cluster\n  /*, namespace*/\nORDER BY\n  time ASC",
+              "refId": "B"
+            }
+          ],
+          "title": "Transfer Standby Task Queue Latency P99",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 10239
+          },
+          "id": 158,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TransferStandby%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time\n",
+              "refId": "chronicle-query-dkNn"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(task_latency_processing_sum{cluster=\"$cluster\",operation=~\"TransferStandby.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_processing_count{cluster=\"$cluster\",operation=~\"TransferStandby.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_passive_tasks_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Passive Transfer Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 10239
+          },
+          "id": 159,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_processing'\n  AND cluster='$cluster'\n  AND operation LIKE 'TransferStandby%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time\n",
+              "refId": "chronicle-query-EnmI"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_processing_bucket{cluster=\"$cluster\",operation=~\"TransferStandby.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:transfer_passive_task_latency_processing:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Passive Transfer Task Processing Latency",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Transfer Passive Task Processing",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 15
+      },
+      "id": 84,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.name} ${__field.labels.namespace}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4224
+          },
+          "id": 76,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_requests{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_timer:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "requests",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TimerActive%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "# sum(rate(task_errors{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_timer_error:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "errors",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Active Timer Task Requests Vs Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4224
+          },
+          "id": 82,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_timer_discarded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "discarded",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors_limit_exceeded_counter{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_timer_error_limit_exceeded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_workflow_busy{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:timer_active_tasks_workflow_busy:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "workflow_busy",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_throttled{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate])) by (resource_exhausted_cause)\n# use the raw query above if needed\n# temporal:v1:timer_active_tasks_workflow_throttled:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "throttled - {{resource_exhausted_cause}}",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_not_active_counter{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "namespace_not_active",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_dependency_task_not_completed{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "dependency_task_not_completed",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_namespace_handover{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "namespace_handover",
+              "range": true,
+              "refId": "G"
+            }
+          ],
+          "title": "Active Timer Errors Breakdown",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.labels.namespace} ${__field.labels.operation}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5217
+          },
+          "id": 78,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_requests{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_timer_by_op:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TimerActive%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation\n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Active Timer Task By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5217
+          },
+          "id": 164,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_errors{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:active_timer_errors_by_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Active Timer Task Unexpected Errors By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.task_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5226
+          },
+          "id": 212,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  sum(events_sum) / sum(events_count) as avg\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'TimerActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-uJlX"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum by (operation, le) (rate(task_attempt_sum{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate])) / sum by (operation, le) (rate(task_attempt_count{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:avg_active_timer:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Timer Task Attempt",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.task_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5226
+          },
+          "id": 213,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  quantileInterpolatedWeighted(${quantile})(bucket, events_count) AS \"p$quantile\"\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'TimerActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-s05B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_attempt_bucket{cluster=\"$cluster\",operation=~\"TimerActive.*\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_timer_task_attempt:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Timer Task Attempt",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5235
+          },
+          "id": 86,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TimerActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-06Yt"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(task_latency_processing_sum{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_processing_count{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_active_timer_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Timer Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5235
+          },
+          "id": 140,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_processing'\n  AND cluster='$cluster'\n  AND operation LIKE 'TimerActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-GIMU"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_processing_bucket{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_timer_task_latency_processing:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Timer Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5244
+          },
+          "id": 141,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TimerActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-9KJ9"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(task_latency_sum{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_count{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_active_timer_overall_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Timer Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5244
+          },
+          "id": 142,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'TimerActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-JM8H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_bucket{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_timer_task_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Timer Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5253
+          },
+          "id": 146,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#(sum by (operation) (rate(task_latency_queue_sum{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_queue_count{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\n# To filter by namespace, hide this Prometheus query and instead use the Clickhouse query below.\ntemporal:v0:avg_active_timer_e2e_latency:rate1m{cluster=\"$cluster\"}*1000",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": " /* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n\t$__timeInterval(timestamp_interval) as time,\n\toperation, /* namespace, */\n\t(sum(sum_total_duration_ms)\n\t/\n\tsum(sum_bucket_events_count)) as value_ms\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n\t$__timeFilter(timestamp_interval)\n\tAND cluster='$cluster'\n\tAND operation LIKE 'TimerActive%'\n\tAND metric='task_latency_queue'\n\t/* AND namespace='REPLACE_ME' */\nGROUP BY \n\ttime, operation\n\t/*, namespace*/\nORDER BY \n\ttime ASC",
+              "refId": "B"
+            }
+          ],
+          "title": "Avg Active Timer Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5253
+          },
+          "id": 145,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_queue_bucket{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])))*1000\n# use the raw query above if needed\n# To filter by namespace, hide this Prometheus query and instead use the Clickhouse query below.\ntemporal:v1:active_timer_task_e2e_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}*1000",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n\t$__timeInterval(timestamp_interval) AS time,\n\toperation, /* namespace, */\n\tquantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) AS quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n\t$__timeFilter(timestamp_interval)\n\tAND metric='task_latency_queue'\n\tAND cluster='$cluster'\n\tAND operation LIKE 'TimerActive%'\n\t/*\n\tAND namespace='REPLACE_ME'\n\t*/\nGROUP BY\n\ttime, operation\n\t/*, namespace*/\nORDER BY\n\ttime ASC",
+              "refId": "B"
+            }
+          ],
+          "title": "Percentile $quantile Active Timer Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a task to be picked up by a worker pool goroutine.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5262
+          },
+          "id": 295,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count)\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_schedule'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TimerActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-5d7k"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_latency_schedule_sum{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_schedule_count{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:active_timer_task_schedule_latency:histogram_quantile_1m{cluster=\"$cluster\", quantile=\"0.5\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Timer Task Schedule Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a task to be picked up by a worker pool goroutine.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5262
+          },
+          "id": 296,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_schedule'\n  AND cluster='$cluster'\n  AND operation LIKE 'TimerActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-5ur2"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_schedule_bucket{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_timer_task_schedule_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Timer Task Schedule Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from task scheduled execution time to load time.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5271
+          },
+          "id": 297,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count)\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_load'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TimerActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-F2JG"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_latency_load_sum{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_load_count{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:active_timer_task_load_latency:histogram_quantile_1m{cluster=\"$cluster\", quantile=\"0.5\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Timer Task Load Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from task scheduled execution time to load time.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5271
+          },
+          "id": 298,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_load'\n  AND cluster='$cluster'\n  AND operation LIKE 'TimerActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-lgBV"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_load_bucket{cluster=\"$cluster\",operation=~\"TimerActive.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:active_timer_task_load_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Timer Task Load Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5280
+          },
+          "id": 325,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum by (operation) (rate(shardinfo_scheduled_queue_lag_sum{cluster=\"$cluster\",task_category=\"timer\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(shardinfo_scheduled_queue_lag_count{cluster=\"$cluster\",task_category=\"timer\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\n#temporal:v1:timer_queue_lag_distribution:histogram_quantile_1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "avg",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.7, sum by (operation, le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"timer\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\n#temporal:v1:timer_queue_lag_distribution:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p70",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.9, sum by (operation, le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"timer\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\n#temporal:v1:timer_queue_lag_distribution:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p90",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"timer\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\n#temporal:v1:timer_queue_lag_distribution:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p99",
+              "range": true,
+              "refId": "G"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(1, sum by (operation, le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"timer\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\n#temporal:v1:timer_queue_lag_distribution:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p100",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Timer Queue Lag distribution",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5280
+          },
+          "id": 92,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(schedule_to_start_timeout{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:schedule_to_start_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "schedule_to_start_timeout",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum(rate(start_to_close_timeout{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:start_to_close_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "start_to_close_timeout",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(schedule_to_close_timeout{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:schedule_to_close_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "schedule_to_close_timeout",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(heartbeat_timeout{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:heartbeat_timeout:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "heartbeat_timeout",
+              "refId": "D"
+            }
+          ],
+          "title": "Timeout Type",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Timer Active Tasks Processing",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 16
+      },
+      "id": 153,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.name} ${__field.labels.namespace}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 16432
+          },
+          "id": 161,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_requests{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_timer:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "requests",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TimerStandby%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporval:v1:timer_passive_tasks_errors:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "errors",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Passive Timer Task Requests Vs Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 16432
+          },
+          "id": 162,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_timer_discarded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "discarded",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_limit_exceeded_counter{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporval:v1:timer_passive_tasks_error_limit_exceeded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_workflow_busy{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporval:v1:timer_passive_tasks_error_workflow_busy:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "workflow_busy",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_throttled{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporval:v1:timer_passive_tasks_error_throttled:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "throttled",
+              "range": true,
+              "refId": "D"
+            }
+          ],
+          "title": "Passive Timer Errors Breakdown",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.labels.namespace} ${__field.labels.operation}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 16441
+          },
+          "id": 163,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_requests{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_timer_by_op:rate1m{cluster=\"$cluster\"} > 0",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TimerStandby%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation\n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Passive Timer Task By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 16441
+          },
+          "id": 80,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_errors{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_timer_error_by_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Passive Timer Task Unexpected Errors By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 24,
+            "x": 0,
+            "y": 16450
+          },
+          "id": 169,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"TimerStandby.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:passive_timer_discarded_by_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Passive Timer Task Discarded",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 16459
+          },
+          "id": 343,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted(0.99)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'TimerStandby%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-ihqF"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(task_latency_bucket{cluster=\"$cluster\",operation=~\"TimerStandby.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "instant": true,
+              "interval": "",
+              "key": "Q-87110a13-c6b8-4059-8107-cec714e83be8-0",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Task Latency p99",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 16459
+          },
+          "id": 344,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(task_latency_queue_bucket{cluster=\"$cluster\",operation=~\"TimerStandby.*\",temporal_service_type=\"history\"}[$rate])))*1000",
+              "hide": true,
+              "instant": true,
+              "interval": "",
+              "key": "Q-29a315cb-ded7-4d3a-8139-f5a50c04eada-0",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n\t$__timeInterval(timestamp_interval) AS time,\n\toperation, /* namespace, */\n\tquantileInterpolatedWeighted(0.99)(bucket, sum_bucket_events_count) AS quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n\t$__timeFilter(timestamp_interval)\n\tAND cluster='$cluster'\n\tAND metric='task_latency_queue'\n\tAND operation LIKE 'TimerStandby%'\n\t/*\n\tAND namespace='REPLACE_ME'\n\t*/\nGROUP BY\n\ttime, operation\n\t/*, namespace*/\nORDER BY\n\ttime ASC",
+              "refId": "B"
+            }
+          ],
+          "title": "Task Latency Queue p99",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 16467
+          },
+          "id": 165,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'TimerStandby%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-lt6s"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(task_latency_processing_sum{cluster=\"$cluster\",operation=~\"TimerStandby.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_processing_count{cluster=\"$cluster\",operation=~\"TimerStandby.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_passive_timer_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Passive Timer Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 16467
+          },
+          "id": 166,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted(0.99)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_processing'\n  AND cluster='$cluster'\n  AND operation LIKE 'TimerStandby%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-Emp5"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_processing_bucket{cluster=\"$cluster\",operation=~\"TimerStandby.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:timer_passive_task_latency_processing:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Passive Timer Task Processing Latency",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Timer Passive Tasks Processing",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 17
+      },
+      "id": 385,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.name} ${__field.labels.namespace}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 4226
+          },
+          "id": 383,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_requests{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "hide": true,
+              "instant": false,
+              "legendFormat": "requests",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 1,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "table",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'OutboundActive%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "errors",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Active Outbound Task Requests Vs Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 4226
+          },
+          "id": 384,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "instant": false,
+              "legendFormat": "discarded",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_workflow_busy{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "workflow_busy",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_throttled{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate])) by (resource_exhausted_cause)",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "throttled - {{resource_exhausted_cause}}",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_not_active_counter{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "namespace_not_active",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_dependency_task_not_completed{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "dependency_task_not_completed",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_namespace_handover{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "namespace_handover",
+              "range": true,
+              "refId": "F"
+            }
+          ],
+          "title": "Active Outbound Errors Breakdown",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.labels.namespace} ${__field.labels.operation}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4421
+          },
+          "id": 381,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(task_requests{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'OutboundActive%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Active Outbound Task By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4421
+          },
+          "id": 382,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(task_errors{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Active Outbound Task Unexpected Errors By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.task_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4430
+          },
+          "id": 379,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  sum(events_sum) / sum(events_count) as avg\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'OutboundActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-UIn5"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum by (operation, le) (rate(task_attempt_sum{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate])) / sum by (operation, le) (rate(task_attempt_count{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate]))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Outbound Task Attempt",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.task_type}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4430
+          },
+          "id": 380,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  quantileInterpolatedWeighted(${quantile})(bucket, events_count) AS \"p$quantile\"\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'OutboundActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-E9kH"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_attempt_bucket{cluster=\"$cluster\",operation=~\"OutboundActive.*\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Timer Task Attempt",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4439
+          },
+          "id": 377,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count)\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'OutboundActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-DECd"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "(sum by (operation) (rate(task_latency_processing_sum{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_processing_count{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Outbound Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4439
+          },
+          "id": 378,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_processing'\n  AND cluster='$cluster'\n  AND operation LIKE 'OutboundActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time\n",
+              "refId": "chronicle-query-Vlgw"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_processing_bucket{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Outbound Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4448
+          },
+          "id": 375,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count)\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'OutboundActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-LmJb"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "(sum by (operation) (rate(task_latency_sum{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_count{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Outbound Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4448
+          },
+          "id": 376,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'OutboundActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-mS0f"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_bucket{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Outbound Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4457
+          },
+          "id": 373,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count)\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_queue'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'OutboundActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-4pgY"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(task_latency_queue_sum{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_queue_count{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])) * 1000",
+              "hide": true,
+              "instant": false,
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Outbound Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4457
+          },
+          "id": 374,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_queue'\n  AND cluster='$cluster'\n  AND operation LIKE 'OutboundActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-Cdvl"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_queue_bucket{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])))*1000",
+              "hide": true,
+              "instant": false,
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Outbound Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a task to be picked up by a worker pool goroutine.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4466
+          },
+          "id": 371,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count)\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_schedule'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'OutboundActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-bYVb"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(task_latency_schedule_sum{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_schedule_count{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate]))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Outbound Task Schedule Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a task to be picked up by a worker pool goroutine.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4466
+          },
+          "id": 372,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_schedule'\n  AND cluster='$cluster'\n  AND operation LIKE 'OutboundActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-uENL"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_schedule_bucket{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Outbound Task Schedule Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from task scheduled execution time to load time.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4475
+          },
+          "id": 369,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_load'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'OutboundActive%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-hxGo"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(task_latency_load_sum{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_load_count{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate]))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Active Outbound Task Load Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from task scheduled execution time to load time.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 4475
+          },
+          "id": 370,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_load'\n  AND cluster='$cluster'\n  AND operation LIKE 'OutboundActive%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-9dHD"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_load_bucket{cluster=\"$cluster\",operation=~\"OutboundActive.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Active Outbound Task Load Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 4484
+          },
+          "id": 368,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum by (operation) (rate(shardinfo_scheduled_queue_lag_sum{cluster=\"$cluster\",task_category=\"outbound\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(shardinfo_scheduled_queue_lag_count{cluster=\"$cluster\",task_category=\"timer\",temporal_service_type=\"history\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "avg",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.7, sum by (operation, le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"outbound\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p70",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.9, sum by (operation, le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"outbound\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p90",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"outbound\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p99",
+              "range": true,
+              "refId": "G"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(1, sum by (operation, le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"outbound\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "p100",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Outbound Queue Lag distribution",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Outbound Active Tasks Processing",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 18
+      },
+      "id": 367,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.name} ${__field.labels.namespace}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 57464
+          },
+          "id": 366,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_requests{cluster=\"$cluster\",operation=~\"OutboundStandby.*\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "requests",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 1,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "table",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'OutboundStandby%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors{cluster=\"$cluster\",operation=~\"OutboundStandby.*\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "errors",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Passive Outbound Task Requests Vs Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 57464
+          },
+          "id": 365,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"OutboundStandby.*\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "discarded",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_workflow_busy{cluster=\"$cluster\",operation=~\"OutboundStandby.*\"}[$rate]))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "workflow_busy",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_throttled{cluster=\"$cluster\",operation=~\"OutboundStandby.*\"}[$rate]))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "throttled",
+              "range": true,
+              "refId": "D"
+            }
+          ],
+          "title": "Passive Outbound Errors Breakdown",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.labels.namespace} ${__field.labels.operation}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 57704
+          },
+          "id": 363,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(task_requests{cluster=\"$cluster\",operation=~\"OutboundStandby.*\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'OutboundStandby%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Passive Outbound Task By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 57704
+          },
+          "id": 364,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(task_errors{cluster=\"$cluster\",operation=~\"OutboundStandby.*\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Passive Outbound Task Unexpected Errors By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 24,
+            "x": 0,
+            "y": 57713
+          },
+          "id": 362,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"OutboundStandby.*\"}[$rate]))",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Passive Outbound Task Discarded",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 57722
+          },
+          "id": 360,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'OutboundStandby%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-7MtO"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_bucket{cluster=\"$cluster\",operation=~\"OutboundStandby.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "instant": true,
+              "interval": "",
+              "key": "Q-87110a13-c6b8-4059-8107-cec714e83be8-0",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Passive Outbound Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 57722
+          },
+          "id": 361,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_queue'\n  AND cluster='$cluster'\n  AND operation LIKE 'OutboundStandby%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-ziLf"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_queue_bucket{cluster=\"$cluster\",operation=~\"OutboundStandby.*\",temporal_service_type=\"history\"}[$rate])))*1000",
+              "hide": true,
+              "instant": false,
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Passive Outbound Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 57730
+          },
+          "id": 358,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'OutboundStandby%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-LQ3G"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "(sum by (operation) (rate(task_latency_processing_sum{cluster=\"$cluster\",operation=~\"OutboundStandby.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_processing_count{cluster=\"$cluster\",operation=~\"OutboundStandby.*\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Passive Outbound Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 57730
+          },
+          "id": 359,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_processing_bucket{cluster=\"$cluster\",operation=~\"OutboundStandby.*\",temporal_service_type=\"history\"}[$rate])))",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Passive Outbound Task Processing Latency",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Outbound Passive Tasks Processing",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 19
+      },
+      "id": 266,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.name} ${__field.labels.namespace}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5001
+          },
+          "id": 268,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_requests{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:visibility_tasks:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "requests",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'VisibilityTask%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time \n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:visibility_tasks_error:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "errors",
+              "refId": "C"
+            }
+          ],
+          "title": "Visibility Task Requests Vs Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5001
+          },
+          "id": 270,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_discarded{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:visibility_tasks_discarded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "discarded",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(task_errors_limit_exceeded_counter{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:visibility_errors_limit_exceeded:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(task_errors_workflow_busy{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:visibility_tasks_workflow_busy:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "workflow_busy",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_throttled{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate])) by (resource_exhausted_cause)\n# use the raw query above if needed\n# temporal:v1:visibility_tasks_workflow_throttled:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "throttled - {{resource_exhausted_cause}}",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_not_active_counter{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "namespace_not_active",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_dependency_task_not_completed{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "dependency_task_not_completed",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(task_errors_namespace_handover{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "namespace_handover",
+              "range": true,
+              "refId": "G"
+            }
+          ],
+          "title": "Visibility Task Errors Breakdown",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.labels.namespace} ${__field.labels.operation}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5114
+          },
+          "id": 272,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_requests{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:visibility_tasks_by_op:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'VisibilityTask%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation\n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Visibility Task By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5114
+          },
+          "id": 274,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_errors{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:visibility_errors_by_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Visibility Task Unexpected Errors By Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5123
+          },
+          "id": 276,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'VisibilityTask%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-kkEy"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(task_latency_processing_sum{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_processing_count{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_visibility_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Visibility Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5123
+          },
+          "id": 278,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_processing'\n  AND cluster='$cluster'\n  AND operation LIKE 'VisibilityTask%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-YXx8"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_processing_bucket{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:visibility_task_latency_processing:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Visibility Task Processing Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5132
+          },
+          "id": 280,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'VisibilityTask%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-B901"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(task_latency_sum{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_count{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_visibility_overall_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Visibility Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5132
+          },
+          "id": 282,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'VisibilityTask%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-e8X6"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_bucket{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:visibility_task_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Visibility Task In-Memory No User Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5141
+          },
+          "id": 284,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "# To filter by namespace, hide this Prometheus query and instead use the Clickhouse query below.\n#(sum by (operation) (rate(task_latency_queue_sum{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_latency_queue_count{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:avg_visibility_e2e_latency:rate1m{cluster=\"$cluster\"}*1000",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": " /* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n\t$__timeInterval(timestamp_interval) as time,\n\toperation, /* namespace, */\n\t(sum(sum_total_duration_ms)\n\t/\n\tsum(sum_bucket_events_count)) as value_ms\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n\t$__timeFilter(timestamp_interval)\n\tAND metric='task_latency_queue'\n\tAND cluster='$cluster'\n\tAND operation LIKE 'VisibilityTask%'\n\t/* AND namespace='REPLACE_ME' */\nGROUP BY \n\ttime, operation\n\t/*, namespace*/\nORDER BY \n\ttime ASC",
+              "refId": "B"
+            }
+          ],
+          "title": "Avg Visibility Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5141
+          },
+          "id": 286,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "# To filter by namespace, hide this Prometheus query and instead use the Clickhouse query below.\n#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_queue_bucket{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:visibility_task_e2e_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}*1000",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n\t$__timeInterval(timestamp_interval) AS time,\n\toperation, /* namespace, */\n\tquantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) AS quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n\t$__timeFilter(timestamp_interval)\n\tAND cluster='$cluster'\n\tAND metric='task_latency_queue'\n\tAND operation LIKE 'VisibilityTask%'\n\t/*\n\tAND namespace='REPLACE_ME'\n\t*/\nGROUP BY\n\ttime, operation\n\t/*, namespace*/\nORDER BY\n\ttime ASC",
+              "refId": "B"
+            }
+          ],
+          "title": "Percentile $quantile Visibility Task E2E Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a task to be picked up by a worker pool goroutine.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5150
+          },
+          "id": 299,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_schedule'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'VisibilityTask%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-0tAW"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_latency_schedule_sum{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_schedule_count{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:visibility_task_schedule_latency:histogram_quantile_1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Visibility Task Schedule Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency for a task to be picked up by a worker pool goroutine.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5150
+          },
+          "id": 300,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_schedule'\n  AND cluster='$cluster'\n  AND operation LIKE 'VisibilityTask%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-Ucyw"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_schedule_bucket{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:visibility_task_schedule_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Visibility Task Schedule Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from task scheduled execution time to load time.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 5159
+          },
+          "id": 301,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    metric, cluster, operation, /* namespace, */\n    sum(sum_total_duration_ms) / sum(sum_bucket_events_count) as avg\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_load'\n    AND cluster='$cluster'\n    /* AND namespace='REPLACE_ME' */\n    AND operation LIKE 'VisibilityTask%'\nGROUP BY time, metric, cluster, operation /*, namespace*/\nORDER BY time",
+              "refId": "chronicle-query-xaSM"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_latency_load_sum{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_load_count{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:visibility_task_load_latency:histogram_quantile_1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Visibility Task Load Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "description": "Latency from task scheduled execution time to load time.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ms"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 5159
+          },
+          "id": 302,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_load'\n  AND cluster='$cluster'\n  AND operation LIKE 'VisibilityTask%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-Do8h"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_load_bucket{cluster=\"$cluster\",operation=~\"VisibilityTask.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:visibility_task_load_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{ operation }}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile $quantile Visibility Task Load Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.task_type}",
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 5168
+          },
+          "id": 320,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  sum(events_sum) / sum(events_count) as avg\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'VisibilityTask%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-AZb6"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(task_attempt_sum{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate])) / sum by (operation) (rate(task_attempt_count{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:visibility_task_attempt:histogram_quantile_1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Visibility Task Attempt",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.task_type}",
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 5168
+          },
+          "id": 322,
+          "interval": "30s",
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  quantileInterpolatedWeighted(0.99)(bucket, events_count) AS p99\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'VisibilityTask%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster --, namespace\nORDER BY time",
+              "refId": "chronicle-query-Kicg"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#histogram_quantile(0.99, sum by (operation, le) (rate(task_attempt_bucket{cluster=\"$cluster\",operation=~\"VisibilityTask.*\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:visibility_task_attempt:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Percentile 0.99 Visibility Task Attempt",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Visibility Tasks Processing",
+      "type": "row"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 20
+      },
+      "id": 227,
+      "panels": [],
+      "title": "Tiered Storage Tasks Processing",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "requests"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "${__field.name} ${__field.labels.namespace}"
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 21
+      },
+      "id": 238,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum(rate(task_requests{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:ts_task_requests:rate1m{cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "requests",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "meta": {
+            "builderOptions": {
+              "columns": [],
+              "database": "",
+              "limit": 1000,
+              "mode": "list",
+              "queryType": "table",
+              "table": ""
+            }
+          },
+          "pluginVersion": "4.5.1",
+          "queryType": "timeseries",
+          "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TieredStorage%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time \n  -- , namespace\nORDER BY time",
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#sum(rate(task_errors{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:ts_task_errors:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "errors",
+          "refId": "C"
+        }
+      ],
+      "title": "Task Requests Vs Errors",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 21
+      },
+      "id": 239,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum(rate(task_errors_workflow_busy{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:ts_tasks_workflow_busy:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "workflow_busy",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "sum(rate(task_errors_throttled{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate])) by (resource_exhausted_cause)\n# use the raw query above if needed\n# temporal:v1:ts_tasks_workflow_throttled:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "throttled - {{resource_exhausted_cause}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Errors Breakdown",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "requests"
+            },
+            "properties": [
+              {
+                "id": "displayName",
+                "value": "${__field.labels.namespace} ${__field.labels.operation}"
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 29
+      },
+      "id": 240,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(task_requests{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:ts_task_requests_by_op:rate1m{cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "meta": {
+            "builderOptions": {
+              "columns": [],
+              "database": "",
+              "limit": 1000,
+              "mode": "list",
+              "queryType": "table",
+              "table": ""
+            }
+          },
+          "pluginVersion": "4.5.1",
+          "queryType": "timeseries",
+          "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(sum_bucket_events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='task_latency_processing'\n    AND cluster='$cluster'\n    AND operation LIKE 'TieredStorage%'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation\n  -- , namespace\nORDER BY time",
+          "refId": "B"
+        }
+      ],
+      "title": "Task Per Operation",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 29
+      },
+      "id": 241,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(task_errors{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:ts_task_errors_by_op:rate1m{cluster=\"$cluster\"}",
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Unexpected Error Per Operation",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 37
+      },
+      "id": 242,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#histogram_quantile($quantile, sum by (le) (rate(shardinfo_scheduled_queue_lag_bucket{cluster=\"$cluster\",task_category=\"tieredStorage\"}[$rate])))\n# use the raw query above if needed\nmax_over_time(temporal:v1:ts_queue_lag_distribution:histogram_quantile_1m{quantile=\"0.5\",cluster=\"$cluster\"}[2m])",
+          "interval": "",
+          "legendFormat": "Lag",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Task Queue Lag",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "displayName": "${__field.labels.task_type}",
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 45
+      },
+      "id": 243,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  sum(events_sum) / sum(events_count) as avg\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-bz86"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(task_attempt_sum{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate])) / sum by (operation) (rate(task_attempt_count{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:ts_task_attempt:histogram_quantile_1m{cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{quantile}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Avg Task Attempt",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "displayName": "${__field.labels.task_type} ${__field.labels.namespace}",
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 45
+      },
+      "id": 244,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": ["lastNotNull", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true,
+          "sortBy": "Max",
+          "sortDesc": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "meta": {},
+          "pluginVersion": "4.11.2",
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n  metric, cluster, task_type,\n  -- namespace,\n  quantileInterpolatedWeighted(${quantile})(bucket, events_count) AS \"p$quantile\"\nFROM global_metric_task_attempt_agg_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND cluster='$cluster'\n  AND metric='task_attempt'\n  AND task_type LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, task_type, cluster\n  -- , namespace\nORDER BY time",
+          "refId": "chronicle-query-F0ZX"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_attempt_bucket{cluster=\"$cluster\",operation=~\"TieredStorage.*\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:ts_task_attempt:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Percentile $quantile Task Attempt",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "displayName": "${__field.labels.metric} (${__field.labels.operation})",
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 53
+      },
+      "id": 245,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted(0.5)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_processing'\n  AND cluster='$cluster'\n  AND operation LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-jKvy"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(task_latency_processing_sum{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_processing_count{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:ts_task_latency_processing:histogram_quantile_1m{cluster=\"$cluster\", quantile=\"0.5\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Median Task Execution Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Latency for a single task processing attempt, including user latency (e.g. workflow lock latency).",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "displayName": "${__field.labels.metric} (${__field.labels.operation})",
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 53
+      },
+      "id": 246,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_processing'\n  AND cluster='$cluster'\n  AND operation LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-himA"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_processing_bucket{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:ts_task_latency_processing:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Percentile $quantile Task Execution Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "displayName": "${__field.labels.metric} (${__field.labels.operation})",
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 61
+      },
+      "id": 247,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted(0.5)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-IxO0"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(task_latency_sum{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_count{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:ts_task_latency:histogram_quantile_1m{cluster=\"$cluster\", quantile=\"0.5\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Median Task In-Memory No User Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Total latency across all task attempts due to system error. Excludes user latency (e.g. workflow lock, latency and backoff for attempts failed due to user error).",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "displayName": "${__field.labels.metric} (${__field.labels.operation})",
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 61
+      },
+      "id": 248,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency'\n  AND cluster='$cluster'\n  AND operation LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-GZ8I"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_bucket{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:ts_task_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Percentile $quantile Task In-Memory No User Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 69
+      },
+      "id": 249,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(task_latency_queue_sum{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_queue_count{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate]))*1000\n# use the raw query above if needed\ntemporal:v1:ts_task_e2e_latency:histogram_quantile_1m{cluster=\"$cluster\", quantile=\"0.5\"}*1000",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "meta": {
+            "builderOptions": {
+              "columns": [],
+              "database": "",
+              "limit": 1000,
+              "mode": "list",
+              "queryType": "table",
+              "table": ""
+            }
+          },
+          "pluginVersion": "4.4.0",
+          "queryType": "timeseries",
+          "rawSql": "SELECT\n\t$__timeInterval(timestamp_interval) AS time,\n\toperation, /* namespace, */\n\tquantileInterpolatedWeighted(0.5)(bucket, sum_bucket_events_count) AS quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n\t$__timeFilter(timestamp_interval)\n\tAND cluster='$cluster'\n\tAND operation LIKE 'TieredStorage%'\n\tAND metric='task_latency_queue'\n\t/*\n\tAND namespace='REPLACE_ME'\n\t*/\nGROUP BY\n\ttime, operation\n\t/*, namespace*/\nORDER BY\n\ttime ASC",
+          "refId": "B"
+        }
+      ],
+      "title": "Median Task E2E Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Latency from scheduled task execution time to completion time. (i.e. task load latency + in memory latency for all attempts including user latency)",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 69
+      },
+      "id": 250,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "# To filter by namespace, hide this Prometheus query and instead use the Clickhouse query below.\n#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_queue_bucket{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:ts_task_e2e_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}*1000",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "meta": {
+            "builderOptions": {
+              "columns": [],
+              "database": "",
+              "limit": 1000,
+              "mode": "list",
+              "queryType": "table",
+              "table": ""
+            }
+          },
+          "pluginVersion": "4.4.0",
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n\t$__timeInterval(timestamp_interval) AS time,\n\toperation, /* namespace, */\n\tquantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) AS quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n\t$__timeFilter(timestamp_interval)\n\tAND cluster='$cluster'\n\tAND metric='task_latency_queue'\n\tAND operation LIKE 'TieredStorage%'\n\t/*\n\tAND namespace='REPLACE_ME'\n\t*/\nGROUP BY\n\ttime, operation\n\t/*, namespace*/\nORDER BY\n\ttime ASC",
+          "refId": "B"
+        }
+      ],
+      "title": "Percentile $quantile Task E2E Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Latency for a task to be picked up by a worker pool goroutine.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 77
+      },
+      "id": 303,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "meta": {},
+          "pluginVersion": "4.4.0",
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted(0.5)(bucket, sum_bucket_events_count) as p50,\n  quantileInterpolatedWeighted(0.9)(bucket, sum_bucket_events_count) as p90,\n  quantileInterpolatedWeighted(0.99)(bucket, sum_bucket_events_count) as p99,\n  quantileInterpolatedWeighted(1)(bucket, sum_bucket_events_count) as p100\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_schedule'\n  AND cluster='$cluster'\n  AND operation LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-l8cY"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(task_latency_schedule_sum{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_schedule_count{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\nlabel_replace(\n    label_replace(temporal:v1:ts_task_schedule_latency:histogram_quantile_1m{cluster=\"$cluster\"},\n        \"quantile\", \"p$1\",\n        \"quantile\", \"^0\\\\.(..+)$\"\n    ),\n    \"quantile\", \"p${1}${2}0\",\n    \"quantile\", \"(1)?0?\\\\.(.)\"\n)",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}} {{quantile}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Avg Task Schedule Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Latency for a task to be picked up by a worker pool goroutine.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "displayName": "${__field.labels.metric} (${__field.labels.operation})",
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "task_latency_schedule (TieredStorageActiveTaskUpload)"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 77
+      },
+      "id": 304,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "meta": {},
+          "pluginVersion": "4.4.0",
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_schedule'\n  AND cluster='$cluster'\n  AND operation LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-tUH2"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_schedule_bucket{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:ts_task_schedule_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Percentile $quantile Task Schedule Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Latency from task scheduled execution time to load time.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 85
+      },
+      "id": 305,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted(0.5)(bucket, sum_bucket_events_count) as p50,\n  quantileInterpolatedWeighted(0.9)(bucket, sum_bucket_events_count) as p90,\n  quantileInterpolatedWeighted(0.99)(bucket, sum_bucket_events_count) as p99,\n  quantileInterpolatedWeighted(1)(bucket, sum_bucket_events_count) as p100\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_load'\n  AND cluster='$cluster'\n  AND operation LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-ISuQ"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(task_latency_load_sum{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(task_latency_load_count{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\nlabel_replace(\n    label_replace(\n        temporal:v1:ts_task_load_latency:histogram_quantile_1m{cluster=\"$cluster\"},\n        \"quantile\", \"p$1\",\n        \"quantile\", \"^0\\\\.(..+)$\"\n    ),\n    \"quantile\", \"p${1}${2}0\",\n    \"quantile\", \"(1)?0?\\\\.(.)\"\n)\n ",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}} {{quantile}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Avg Task Load Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "description": "Latency from task scheduled execution time to load time.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "displayName": "${__field.labels.metric} (${__field.labels.operation})",
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 85
+      },
+      "id": 306,
+      "interval": "30s",
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 0,
+          "hide": false,
+          "queryType": "timeseries",
+          "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n  $__timeInterval(timestamp_interval) as time,\n  metric, cluster, operation,\n  -- namespace,\n  quantileInterpolatedWeighted($quantile)(bucket, sum_bucket_events_count) as quantile\nFROM global_metric_task_latency__agg__heatmap_percentile_30s\nWHERE\n  $__timeFilter(timestamp_interval)\n  AND metric='task_latency_load'\n  AND cluster='$cluster'\n  AND operation LIKE 'TieredStorage%'\n  -- AND namespace='REPLACE_ME'\nGROUP BY\n  time, metric, operation, cluster --, namespace\nORDER BY time",
+          "refId": "chronicle-query-70cU"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(task_latency_load_bucket{cluster=\"$cluster\",operation=~\"TieredStorage.*\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:ts_task_load_latency:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Percentile $quantile Task Load Latency",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "bytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 93
+      },
+      "id": 251,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 1,
+          "hide": false,
+          "meta": {
+            "builderOptions": {
+              "columns": [],
+              "database": "",
+              "limit": 1000,
+              "mode": "list",
+              "queryType": "table",
+              "table": ""
+            }
+          },
+          "pluginVersion": "4.5.0",
+          "queryType": "table",
+          "rawSql": "SELECT\n    $__timeInterval(timestamp) as time,\n    -- namespace,\n    cluster,\n    quantile($quantile)(value) as \"p$quantile\"\nFROM global_metric_upload_data\nWHERE\n    $__timeFilter(timestamp)\n    AND metric='upload_payload_size'\n    AND cluster = '$cluster'\n    AND operation='TieredStorageHistoryAggregator'\n    AND service='history'\n    -- AND namespace='ns.account'\nGROUP BY time, cluster\n-- , namespace\nORDER BY time\n--limit 100\n",
+          "refId": "B"
+        }
+      ],
+      "title": "Tiered Storage Upload P$quantile Payload Size",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "datasource",
+        "uid": "-- Mixed --"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 93
+      },
+      "id": 252,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#histogram_quantile($quantile, sum by (namespace, le) (rate(pending_upload_request_bucket{cluster=\"$cluster\",operation=\"TieredStorageHistoryAggregator\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:ts_batch_task_count_by_ns:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+          "hide": true,
+          "interval": "",
+          "legendFormat": "{{namespace}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "grafana-clickhouse-datasource",
+            "uid": "${clickhouse_datasource}"
+          },
+          "editorType": "sql",
+          "format": 1,
+          "hide": false,
+          "meta": {
+            "builderOptions": {
+              "columns": [],
+              "database": "",
+              "limit": 1000,
+              "mode": "list",
+              "queryType": "table",
+              "table": ""
+            }
+          },
+          "pluginVersion": "4.5.0",
+          "queryType": "table",
+          "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    --namespace,\n    cluster,\n    quantileInterpolatedWeighted($quantile)(bucket, events_count) AS \"p$quantile\"\nFROM global_metric_upload_data_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='pending_upload_request'\n    AND cluster = '$cluster'\n    AND service = 'history'\n    AND operation = 'TieredStorageHistoryAggregator'\n    -- AND namespace='ns.account'\nGROUP BY time, cluster --, namespace\nORDER BY time",
+          "refId": "B"
+        }
+      ],
+      "title": " Batch Task Count",
+      "type": "timeseries"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 101
+      },
+      "id": 11,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 281
+          },
+          "id": 9,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(persistence_requests{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])) \n# use the raw query above if needed\ntemporal:v1:persistence_requests:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "requests",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    sum(events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_persistence_latency_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster='$cluster'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time\n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            }
+          ],
+          "title": "Request Vs Errors",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 281
+          },
+          "id": 14,
+          "options": {
+            "legend": {
+              "calcs": ["lastNotNull", "max"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true,
+              "sortBy": "Max",
+              "sortDesc": false
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (error_type) (rate(persistence_error_with_type{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:persistence_errors_by_type:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{error_type}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (error_type) (rate(saas_persistence_error_with_type{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:saas_persistence_errors_by_type:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{error_type}}",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Errors Break Down",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": [
+              {
+                "matcher": {
+                  "id": "byName",
+                  "options": "requests"
+                },
+                "properties": [
+                  {
+                    "id": "displayName",
+                    "value": "${__field.labels.namespace} ${__field.labels.operation}"
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 290
+          },
+          "id": 13,
+          "options": {
+            "legend": {
+              "calcs": ["max", "lastNotNull"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true,
+              "sortBy": "Max",
+              "sortDesc": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(persistence_requests{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:persistence_core_request_per_op:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.1",
+              "queryType": "timeseries",
+              "rawSql": "SELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(events_count) / greatest($__interval_s, 30) AS requests\nFROM global_metric_persistence_latency_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster='$cluster'\n    -- AND namespace='prod.infra'\nGROUP BY \n  time,\n  operation\n  -- , namespace\nORDER BY time",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(saas_persistence_requests{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:saas_persistence_core_request_per_op:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Requests Per Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 290
+          },
+          "id": 356,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (operation) (rate(persistence_error_with_type{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\n#temporal:v0:persistence_core_error_per_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(saas_persistence_errors{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:saas_persistence_core_error_per_op:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Errors Per Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 299
+          },
+          "id": 149,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    sum(events_sum) / sum(events_count)/1000 as avg\nFROM global_metric_persistence_latency_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '$cluster'\n    AND service = 'history'\n    -- AND namespace='ns.account'\nGROUP BY time, operation\n-- , namespace\nORDER BY time",
+              "refId": "chronicle-query-xd33v"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#(sum by (operation) (rate(saas_persistence_latency_sum{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(saas_persistence_latency_count{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:saas_persistence_core_avg_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#(sum by (operation) (rate(persistence_latency_sum{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(persistence_latency_count{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:persistence_core_avg_latency:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "refId": "A"
+            }
+          ],
+          "title": "Avg Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 299
+          },
+          "id": 147,
+          "options": {
+            "legend": {
+              "calcs": ["max", "last"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (resource_exhausted_scope,resource_exhausted_cause,temporal_namespace) (rate(persistence_errors_resource_exhausted{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\n#temporal:v0:persistence_core_error_per_op:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{resource_exhausted_scope}}_{{resource_exhausted_cause}}_{{temporal_namespace}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (operation) (rate(saas_persistence_errors{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:saas_persistence_core_error_per_op:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Persistence Resource Exhausted",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "displayName": "${__field.labels.operation}",
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 308
+          },
+          "id": 16,
+          "options": {
+            "legend": {
+              "calcs": ["max", "lastNotNull"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true,
+              "sortBy": "Last *",
+              "sortDesc": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.10.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp_interval) as time,\n    -- namespace,\n    operation,\n    quantileInterpolatedWeighted(${quantile})(bucket, events_count)/1000 AS \"p$quantile\"\nFROM global_metric_persistence_latency_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '$cluster'\n    AND service = 'history'\n    -- AND operation in ('ReadRawHistoryBranch', 'ReadHistoryBranch', 'GetWorkflowExecution', 'UpdateWorkflowExecution', 'UpdateShard', 'GetTimerTasks', 'GetTransferTasks', 'GetCurrentExecution', 'GetReplicationTasks')\n    -- AND namespace='ns.account'\nGROUP BY time, operation\n-- , namespace\nORDER BY time",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(persistence_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:persistence_latency_by_op:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(saas_persistence_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:saas_persistence_latency_by_op:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Percentile $quantile Latency",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Persistence",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 102
+      },
+      "id": 254,
+      "panels": [
+        {
+          "fieldConfig": {
+            "defaults": {},
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 215
+          },
+          "id": 290,
+          "options": {
+            "code": {
+              "language": "plaintext",
+              "showLineNumbers": false,
+              "showMiniMap": false
+            },
+            "content": "<h3>Useful links</h3>\n\n<ul>\n  <li><a target=\"_blank\" href=\"https://grafana.tmprl-internal.cloud/d/PfEgf9BVk/cds-overview?orgId=1&${env:queryparam}&${cluster:queryparam}&from=${__from}&to=${__to}\">\n    CDS Overview\n  </a></li>\n    <li><a target=\"_blank\" href=\"https://grafana.tmprl-internal.cloud/d/Mf43Jsb4k/wal-boss?orgId=1&${env:queryparam}&${cluster:queryparam}&from=${__from}&to=${__to}\">\n    WAL BOSS (boss-proxy)\n  </a></li>\n  <li><a target=\"_blank\" href=\"https://grafana.tmprl-internal.cloud/d/9V7duAXVk/boss-bk-zk-stack?orgId=1&${datasource:queryparam}&${cluster:queryparam}&from=${__from}&to=${__to}\">\n    BOSS BK ZK Stack\n  </a></li>\n  <li><a target=\"_blank\" href=\"https://grafana.tmprl-internal.cloud/explore?orgId=1&left=%7B%22datasource%22:%22${logs_datasource}%22,%22queries%22:%5B%7B%22expr%22:%22%7Bcluster%3D%5C%22${cluster}%5C%22,k8s_component%3D%5C%22history%5C%22%7D%20%7C%3D%20%5C%22old%20WAL%5C%22%20%7C%20json%22,%22refId%22:%22A%22%7D%5D,%22range%22:%7B%22from%22:%22${__from}%22,%22to%22:%22${__to}%22%7D%7D\">\n    Loki: \"Which shards need to scrap?\"\n  </a></li>\n  <li><a target=\"_blank\" href=\"https://grafana.tmprl-internal.cloud/explore?orgId=1&left=%7B%22datasource%22:%22${logs_datasource}%22,%22queries%22:%5B%7B%22expr%22:%22sum%28rate%28%7Bcluster%3D%5C%22${cluster}%5C%22,k8s_component%3D%5C%22history%5C%22%7D%20%7C%3D%20%5C%22error%5C%22%20%7C%3D%20%5C%22stream-scope%5C%22%20%7C%20json%20%5B1s%5D%29%29%20by%20%28stream_scope%29%22,%22refId%22:%22A%22,%22editorMode%22:%22code%22,%22queryType%22:%22range%22%7D%5D,%22range%22:%7B%22from%22:%22${__from}%22,%22to%22:%22${__to}%22%7D%7D\">\n    Loki: \"Which WALs are erroring?\"\n  </a></li>\n  <li><a target=\"_blank\" href=\"https://grafana.tmprl-internal.cloud/explore?orgId=1&left=%7B%22datasource%22:%22${logs_datasource}%22,%22queries%22:%5B%7B%22expr%22:%22sum%28count_over_time%28%7Bcluster%3D%5C%22${cluster}%5C%22,k8s_component%3D%5C%22history%5C%22%7D%20%7C%3D%20%5C%22error%5C%22%20%7C%3D%20%5C%22stream-scope%5C%22%20%7C%20regexp%20%5C%22closing%20recvLoop:%20%28%3FP%3Cerror%3E%5B%5E%5C%5C%5C%22%5D%2A%29.%2A%5C%22%20%5B1s%5D%29%29%20by%20%28error%29%22,%22refId%22:%22A%22,%22editorMode%22:%22code%22,%22queryType%22:%22range%22,%22legendFormat%22:%22write%20%7B%7Berror%7D%7D%22%7D,%7B%22expr%22:%22sum%28count_over_time%28%7Bcluster%3D%5C%22s-f59ed%5C%22,k8s_component%3D%5C%22history%5C%22%7D%20%7C%3D%20%5C%22error%5C%22%20%7C%3D%20%5C%22stream-scope%5C%22%20%7C%20json%20%5B1s%5D%29%29%20by%20%28error%29%22,%22refId%22:%22E%22,%22editorMode%22:%22code%22,%22queryType%22:%22range%22,%22legendFormat%22:%22write%20%7B%7Berror%7D%7D%22%7D,%7B%22refId%22:%22B%22,%22editorMode%22:%22code%22,%22expr%22:%22sum%28count_over_time%28%7Bcluster%3D%5C%22s-f59ed%5C%22,k8s_component%3D%5C%22history%5C%22%7D%20%7C%3D%20%5C%22recovered%5C%22%5B1s%5D%29%29%22,%22queryType%22:%22range%22,%22legendFormat%22:%22recovered%22%7D,%7B%22expr%22:%22sum%28count_over_time%28%7Bcluster%3D%5C%22s-f59ed%5C%22,k8s_component%3D%5C%22history%5C%22%7D%20%7C~%20%5C%22total%20records%20from%20segment.%2A0%20errors.%2A%5C%22%20%5B1s%5D%29%29%22,%22refId%22:%22C%22,%22editorMode%22:%22code%22,%22queryType%22:%22range%22,%22legendFormat%22:%22recovery%20read%20%28success%29%22%7D,%7B%22expr%22:%22sum%28count_over_time%28%7Bcluster%3D%5C%22s-f59ed%5C%22,k8s_component%3D%5C%22history%5C%22%7D%20%7C~%20%5C%22total%20records%20from%20segment.%2A%5B1-9%5D%5B0-9%5D%2A%20errors.%2A%5C%22%20%5B1s%5D%29%29%22,%22refId%22:%22D%22,%22editorMode%22:%22code%22,%22queryType%22:%22range%22,%22legendFormat%22:%22recovery%20read%20%28failure%29%22%7D%5D,%22range%22:%7B%22from%22:%22${__from}%22,%22to%22:%22${__to}%22%7D%7D\">\n    Loki: Write failure + recovery analyzer (&lt;5m TIME RANGES ONLY)\n  </a></li>\n</ul>",
+            "mode": "html"
+          },
+          "pluginVersion": "11.4.0",
+          "title": "Panel Title",
+          "type": "text"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Displays duration for CDS shard recovery stages.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "noValue": "(No recoveries)",
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 4
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 6,
+            "x": 12,
+            "y": 215
+          },
+          "id": 397,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_open_reader_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\ntemporal:v1:storage_recovery_latency_open_reader_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\", walType=\"MUTABLE_STATE_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "1. create reader",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (le) (rate(recovery_rate_limiter_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\", walType=\"MUTABLE_STATE_WAL\"}[$__rate_interval]))) > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "2. ratelimiter",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_first_reader_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\nsum(temporal:v1:storage_recovery_latency_first_read_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\", walType=\"MUTABLE_STATE_WAL\"}) > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "3. read & replay",
+              "range": true,
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_takeover_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\ntemporal:v1:storage_recovery_latency_wal_update_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\", walType=\"MUTABLE_STATE_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "4. shard segments update",
+              "range": true,
+              "refId": "K"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_takeover_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\ntemporal:v1:storage_recovery_latency_takeover_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\",walType=\"MUTABLE_STATE_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "5. takeover write",
+              "range": true,
+              "refId": "M"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_total_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\ntemporal:v1:storage_recovery_latency_total_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\",walType=\"MUTABLE_STATE_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "6. Total recovery period",
+              "range": true,
+              "refId": "O"
+            }
+          ],
+          "title": "Recovery Phase Latencies - MS WAL",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Displays errors CDS shard recovery stages.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "noValue": "(No recovery errors)",
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 6,
+            "x": 18,
+            "y": 215
+          },
+          "id": 398,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "temporal:v1:storage_recovery_errors_open_reader_error:rate1m{cluster=\"$cluster\",walType=\"MUTABLE_STATE_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "1. create reader",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(temporal:v1:storage_recovery_errors_first_read_error:rate1m{cluster=\"$cluster\",walType=\"MUTABLE_STATE_WAL\"}) > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "2. read & replay",
+              "range": true,
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "temporal:v1:storage_recovery_errors_wal_update_error:rate1m{cluster=\"$cluster\",walType=\"MUTABLE_STATE_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "3. shard segments update",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "temporal:v1:storage_recovery_errors_takeover_error:rate1m{cluster=\"$cluster\",walType=\"MUTABLE_STATE_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "4. takeover write",
+              "range": true,
+              "refId": "M"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "temporal:v1:storage_recovery_errors_total_error:rate1m{cluster=\"$cluster\",walType=\"MUTABLE_STATE_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "Total Recovery Errors",
+              "range": true,
+              "refId": "O"
+            }
+          ],
+          "title": "Recovery Errors - MS WAL",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "#EAB839",
+                    "value": 30000
+                  },
+                  {
+                    "color": "red",
+                    "value": 32768
+                  },
+                  {
+                    "color": "#1F78C1",
+                    "value": 100000
+                  },
+                  {
+                    "color": "#6ED0E0",
+                    "value": 200000
+                  },
+                  {
+                    "color": "#EF843C",
+                    "value": 300000
+                  },
+                  {
+                    "color": "#E24D42",
+                    "value": 400000
+                  },
+                  {
+                    "color": "#BA43A9",
+                    "value": 500000
+                  },
+                  {
+                    "color": "#705DA0",
+                    "value": 600000
+                  },
+                  {
+                    "color": "#508642",
+                    "value": 700000
+                  },
+                  {
+                    "color": "#CCA300",
+                    "value": 800000
+                  },
+                  {
+                    "color": "#447EBC",
+                    "value": 900000
+                  },
+                  {
+                    "color": "#C15C17",
+                    "value": 1000000
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 263
+          },
+          "id": 256,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum by (operation) (rate(storage_latency_count{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:storage_requests_by_op:rate1m{cluster=\"$cluster\",operation!=\"get_history_branch_util\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Requests Per Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Displays duration for CDS shard recovery stages.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "noValue": "(No recoveries)",
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 4
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 6,
+            "x": 12,
+            "y": 263
+          },
+          "id": 395,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_open_reader_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\ntemporal:v1:storage_recovery_latency_open_reader_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\", walType=\"HISTORY_EVENT_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "1. create reader",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (le) (rate(recovery_rate_limiter_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\", walType=\"MUTABLE_STATE_WAL\"}[$__rate_interval]))) > 0 ",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "2. ratelimiter",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_first_reader_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\nsum(temporal:v1:storage_recovery_latency_first_read_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\", walType=\"HISTORY_EVENT_WAL\"}) > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "3. read & replay",
+              "range": true,
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_takeover_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\ntemporal:v1:storage_recovery_latency_wal_update_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\", walType=\"HISTORY_EVENT_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "4. shard segments update",
+              "range": true,
+              "refId": "K"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_takeover_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\ntemporal:v1:storage_recovery_latency_takeover_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\",walType=\"HISTORY_EVENT_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "5. takeover write",
+              "range": true,
+              "refId": "M"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(recovery_total_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval])))\n# use the raw query above if needed\ntemporal:v1:storage_recovery_latency_total_latency:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\",walType=\"HISTORY_EVENT_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "6. Total recovery period",
+              "range": true,
+              "refId": "O"
+            }
+          ],
+          "title": "Recovery Phase Latencies - HE WAL",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Displays errors CDS shard recovery stages.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "noValue": "(No recovery errors)",
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 6,
+            "x": 18,
+            "y": 263
+          },
+          "id": 400,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "temporal:v1:storage_recovery_errors_open_reader_error:rate1m{cluster=\"$cluster\",walType=\"HISTORY_EVENT_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "1. create reader",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum(temporal:v1:storage_recovery_errors_first_read_error:rate1m{cluster=\"$cluster\",walType=\"HISTORY_EVENT_WAL\"}) > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "2. read & replay",
+              "range": true,
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "temporal:v1:storage_recovery_errors_wal_update_error:rate1m{cluster=\"$cluster\",walType=\"HISTORY_EVENT_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "3. shard segments update",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "temporal:v1:storage_recovery_errors_takeover_error:rate1m{cluster=\"$cluster\",walType=\"HISTORY_EVENT_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "4. takeover write",
+              "range": true,
+              "refId": "M"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "temporal:v1:storage_recovery_errors_total_error:rate1m{cluster=\"$cluster\",walType=\"HISTORY_EVENT_WAL\"} > 0",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "Total Recovery Errors",
+              "range": true,
+              "refId": "O"
+            }
+          ],
+          "title": "Recovery Errors - HE WAL",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "short"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 272
+          },
+          "id": 258,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum by (operation) (rate(storage_error{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:storage_errors:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(watermark_manager_flush_watermarks_error_count{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "watermark_manager_flush",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(flush_ms_snapshot_aborts{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "flush_ms_snapshot_aborts",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Errors Per Operation",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Segments read from during a call to ReliableReader::ReadV2",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "noValue": "(No segments read)",
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 6,
+            "x": 12,
+            "y": 272
+          },
+          "id": 396,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile($quantile, sum by (le) (rate(segments_per_read_bucket{cluster=\"$cluster\", namespace=\"temporal\", temporal_service_type=\"history\", walType=\"MUTABLE_STATE_WAL\", component=\"leader\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "MS Recovery {{fencing}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile($quantile, sum by (le) (rate(segments_per_read_bucket{cluster=\"$cluster\", namespace=\"temporal\", temporal_service_type=\"history\", walType=\"HISTORY_EVENT_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "HE Recovery",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile($quantile, sum by (le) (rate(segments_per_read_bucket{cluster=\"$cluster\", namespace=\"temporal\", temporal_service_type=\"history\", walType=\"MUTABLE_STATE_WAL\", component=\"follower.metering\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "Metering Recovery",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "Segments per Read",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "GRPCReaderV2 level latencies",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "noValue": "(No WAL reads)",
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 6,
+            "x": 18,
+            "y": 272
+          },
+          "id": 401,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile($quantile, sum by (le, walType) (rate(wal_reader_open_latency_bucket{cluster=\"$cluster\", namespace=\"temporal\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "open reader {{walType}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile($quantile, sum by (le, walType) (rate(wal_reader_page_latency_bucket{cluster=\"$cluster\", namespace=\"temporal\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "per page {{walType}}",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "# todo : support MS WAL as well once we can seperate recovery 'vs' metering reads. Right now the polling of metering overshadows all other latencies :(\nhistogram_quantile($quantile, sum by (le, walType) (rate(wal_reader_total_latency_bucket{cluster=\"$cluster\", namespace=\"temporal\", walType=\"HISTORY_EVENT_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "Total RPC {{walType}}",
+              "range": true,
+              "refId": "C"
+            }
+          ],
+          "title": "GRPC read latencies",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 281
+          },
+          "id": 260,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#sum by (operation) (rate(storage_latency_sum{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])) / sum by (operation) (rate(storage_latency_count{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:history_storage_latency_by_op:histogram_quantile1m{cluster=\"$cluster\", quantile=\"0.5\"}",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(watermark_manager_flush_watermarks_latency_sum{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval])) / sum(rate(watermark_manager_flush_watermarks_latency_count{cluster=\"$cluster\",service=\"$service\",temporal_service_type=\"history\"}[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "watemark_manager_flush (avg)",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Median Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 281
+          },
+          "id": 318,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (error_type) (rate(storage_error{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v1:storage_error_breakdown:rate1m{cluster=\"$cluster\"}",
+              "legendFormat": "{{error_type}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Error Breakdown",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 290
+          },
+          "id": 331,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.50, sum by (le) (rate(segments_per_shard_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[2m])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "p50",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.95, sum by (le) (rate(segments_per_shard_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[2m])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "p95",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(segments_per_shard_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[2m])))",
+              "instant": false,
+              "legendFormat": "p99",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1.0, sum by (le) (rate(segments_per_shard_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[2m])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "p100",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Segments / shard",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 290
+          },
+          "id": 262,
+          "options": {
+            "legend": {
+              "calcs": ["lastNotNull", "max"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(storage_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:history_storage_latency_by_op:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "refId": "A"
+            }
+          ],
+          "title": "Quantile $quantile Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "axisSoftMin": 0,
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 299
+          },
+          "id": 288,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "sum by (wal_scope_id) (rate(wal_latency_count{cluster=\"$cluster\",operation=\"write_stream\",temporal_service_type=\"history\"}[$rate]))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Multi WAL write rates",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": [
+              {
+                "__systemRef": "hideSeriesFrom",
+                "matcher": {
+                  "id": "byNames",
+                  "options": {
+                    "mode": "exclude",
+                    "names": ["GetHistoryTreeContainingBranch"],
+                    "prefix": "All except:",
+                    "readOnly": true
+                  }
+                },
+                "properties": [
+                  {
+                    "id": "custom.hideFrom",
+                    "value": {
+                      "legend": false,
+                      "tooltip": false,
+                      "viz": true
+                    }
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 299
+          },
+          "id": 390,
+          "options": {
+            "legend": {
+              "calcs": ["lastNotNull", "max"],
+              "displayMode": "table",
+              "placement": "bottom",
+              "showLegend": true,
+              "sortBy": "Max",
+              "sortDesc": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum by (method) (rate(gocql_resource_exhausted_count{cluster=\"$cluster\"}[$__rate_interval])) > 0",
+              "instant": false,
+              "legendFormat": "{{method}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Storage Resource Exhausted",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 308
+          },
+          "id": 410,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile($quantile, sum(rate(element_iolock_latency_bucket{cluster=\"$cluster\"}[$__rate_interval])) by (le, lock_type, lock_mode))",
+              "legendFormat": "{{lock_mode}} {{lock_type}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Element Lock Acquire Latency [Quantile $quantile]",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 308
+          },
+          "id": 411,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "# histogram_quantile($quantile, sum(rate(element_iolock_latency_bucket{cluster=\"$cluster\"}[$__rate_interval])) by (le, lock_type, lock_mode))\nsum(rate(element_iolock_latency_count{cluster=\"$cluster\"}[$__rate_interval])) by (acquired, lock_type, lock_mode)",
+              "legendFormat": "{{acquired}} {{lock_type}} {{lock_mode}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Element Lock Acquired vs Failed",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "min": 0,
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 317
+          },
+          "id": 309,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "min by (home_base) (history_status_home_base{cluster=\"$cluster\",preference=\"short-lived\",temporal_service_type=\"history\"})",
+              "interval": "",
+              "legendFormat": "{{home_base}} (min)",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "max by (home_base) (history_status_home_base{cluster=\"$cluster\",preference=\"short-lived\",temporal_service_type=\"history\"})",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{home_base}} (max)",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Home Base Weights by Preference (short-lived)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 4194304
+                  },
+                  {
+                    "color": "#EAB839",
+                    "value": 8388608
+                  }
+                ]
+              },
+              "unit": "none"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 317
+          },
+          "id": 386,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1.0, sum by (le) (rate(cds_requests_inflight_bucket{cluster=\"$cluster\",container=\"history\"}[$rate])))",
+              "legendFormat": "max-cds-inflight",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1.0, sum by (le) (rate(wal_requests_inflight_bucket{cluster=\"$cluster\",container=\"history\"}[$rate])))",
+              "hide": false,
+              "legendFormat": "max-WAL-inflight",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Persistence inflight request",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "normal"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "min": 0,
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 326
+          },
+          "id": 307,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "label_replace(sum by (preference) (rate(history_prefer_home_base{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])), \"preference\", \"### ORDINAL FALLBACK ###\", \"preference\", \"^$\")",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Home Base Preference Rate (stacked graph)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "line"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 4194304
+                  },
+                  {
+                    "color": "#EAB839",
+                    "value": 8388608
+                  }
+                ]
+              },
+              "unit": "bytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 6,
+            "x": 12,
+            "y": 326
+          },
+          "id": 314,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(flush_persistence_behindness_bytes_bucket{cluster=\"$cluster\",container=\"history\",walType=\"MUTABLE_STATE_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "MS P99",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1, sum by (le) (rate(flush_persistence_behindness_bytes_bucket{cluster=\"$cluster\",container=\"history\",walType=\"MUTABLE_STATE_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "MS P100",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(flush_persistence_behindness_bytes_bucket{cluster=\"$cluster\",container=\"history\",walType=\"HISTORY_EVENT_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "HE P99",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1, sum by (le) (rate(flush_persistence_behindness_bytes_bucket{cluster=\"$cluster\",container=\"history\",walType=\"HISTORY_EVENT_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "HE P100",
+              "range": true,
+              "refId": "G"
+            }
+          ],
+          "title": "Watermark behindness (bytes)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": ""
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": [
+              {
+                "__systemRef": "hideSeriesFrom",
+                "matcher": {
+                  "id": "byNames",
+                  "options": {
+                    "mode": "exclude",
+                    "names": ["HE P99", "MS P99"],
+                    "prefix": "All except:",
+                    "readOnly": true
+                  }
+                },
+                "properties": [
+                  {
+                    "id": "custom.hideFrom",
+                    "value": {
+                      "legend": false,
+                      "tooltip": false,
+                      "viz": true
+                    }
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 6,
+            "x": 18,
+            "y": 326
+          },
+          "id": 316,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(flush_persistence_behindness_time_bucket{cluster=\"$cluster\",container=\"history\",walType=\"MUTABLE_STATE_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "MS P99",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1, sum by (le) (rate(flush_persistence_behindness_time_bucket{cluster=\"$cluster\",container=\"history\",walType=\"MUTABLE_STATE_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "MS P100",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(flush_persistence_behindness_time_bucket{cluster=\"$cluster\",container=\"history\",walType=\"HISTORY_EVENT_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "HE P99",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1, sum by (le) (rate(flush_persistence_behindness_time_bucket{cluster=\"$cluster\",container=\"history\",walType=\"HISTORY_EVENT_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "HE P100",
+              "range": true,
+              "refId": "G"
+            }
+          ],
+          "title": "Watermark behindness (time)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "normal"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "min": 0,
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 335
+          },
+          "id": 311,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "label_replace(sum by (preference) (rate(history_chosen_home_base{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])), \"preference\", \"### ORDINAL FALLBACK ###\", \"preference\", \"^$\")",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Home Base Chosen Rate (stacked graph)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": ""
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 6,
+            "x": 12,
+            "y": 335
+          },
+          "id": 387,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(flush_time_since_last_persist_bucket{cluster=\"$cluster\",temporal_service_type=\"history\",walType=\"MUTABLE_STATE_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "MS P99",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1, sum by (le) (rate(flush_time_since_last_persist_bucket{cluster=\"$cluster\",temporal_service_type=\"history\",walType=\"MUTABLE_STATE_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "MS P100",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(0.99, sum by (le) (rate(flush_time_since_last_persist_bucket{cluster=\"$cluster\",temporal_service_type=\"history\",walType=\"HISTORY_EVENT_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "HE P99",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "histogram_quantile(1, sum by (le) (rate(flush_time_since_last_persist_bucket{cluster=\"$cluster\",temporal_service_type=\"history\",walType=\"HISTORY_EVENT_WAL\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "HE P100",
+              "range": true,
+              "refId": "G"
+            }
+          ],
+          "title": "Time Since Last Flush (flusher v2)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 6,
+            "x": 18,
+            "y": 335
+          },
+          "id": 333,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(ledger_rotation_count{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "Ledger Rotation",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(ledger_deletion_count{cluster=\"$cluster\",temporal_service_type=\"history\"}[$__rate_interval]))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "Ledger Deletion",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Ledger Ops / sec",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "CDS has 2 caching layers: the Element cache is relatively small for very recently touched workflow data, with lifetimes of a few minutes. The Execution cache is larger, holds data in a serialized form, and uses off-heap memory to reduce GC impact.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "axisSoftMax": 1,
+                "axisSoftMin": 0,
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "decimals": 1,
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "percentunit"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 344
+          },
+          "id": 342,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "# sum(rate(element_memory_loads{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])) / (sum(rate(element_store_loads{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])) + sum(rate(element_memory_loads{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])))\ntemporal:v1:storage_element_memory_load_ratio{cluster=\"$cluster\"}",
+              "instant": false,
+              "legendFormat": "Element Cache",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(cds_execution_cache_hits{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])) /\n(sum(rate(cds_execution_cache_hits{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])) + sum(rate(cds_execution_cache_misses{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "Execution Cache",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "CDS Caches",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "normal"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "min": 0,
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 344
+          },
+          "id": 312,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "label_replace(sum by (home_base) (rate(history_missed_home_base{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])), \"home_base\", \"#$1\", \"home_base\", \"^_(\\\\d+)$\")",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Home Base Not Found Rate (stacked graph)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "normal"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "min": 0,
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 353
+          },
+          "id": 308,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "label_replace(sum by (home_base) (rate(history_select_home_base{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])), \"home_base\", \"#$1\", \"home_base\", \"^_(\\\\d+)$\")",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Home Base Select Rate (stacked graph)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 10,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "never",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "normal"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "min": 0,
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "ops"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 12,
+            "y": 353
+          },
+          "id": 310,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "multi",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "label_replace(sum by (home_base) (rate(history_target_home_base{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])), \"home_base\", \"#$1\", \"home_base\", \"^_(\\\\d+)$\")",
+              "interval": "",
+              "legendFormat": "{{operation}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Home Base Target Rate (stacked graph)",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 12,
+            "x": 0,
+            "y": 362
+          },
+          "id": 327,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(recent_shards_started{cluster=\"$cluster\"}[$rate]))",
+              "hide": true,
+              "legendFormat": "Recent Shard Started",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "sum(rate(shards_started_count{cluster=\"$cluster\"}[$__rate_interval]))",
+              "hide": false,
+              "legendFormat": "Shards Started",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Recent Shards Started",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Storage",
+      "type": "row"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 103
+      },
+      "id": 231,
+      "panels": [],
+      "title": "Tiered Storage Persistence",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 104
+      },
+      "id": 229,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#sum(rate(store_requests{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:persistence_ts:rate1m{cluster=\"$cluster\"}",
+          "interval": "",
+          "legendFormat": "requests",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#sum(rate(store_errors{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:persistence_ts_errors:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "errors",
+          "refId": "B"
+        }
+      ],
+      "title": "Requests Vs Errors",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 104
+      },
+      "id": 232,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#sum(rate(store_errors_entity_not_exists{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:persistence_ts_error_entity_not_exists:rate1m{cluster=\"$cluster\"}",
+          "interval": "",
+          "legendFormat": "not_exists_error",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#sum(rate(store_errors_bad_request{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:persistence_ts_error_bad_request:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "bad_request_errors",
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#sum(rate(store_errors_unavailable{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:persistence_ts_error_unavailable:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "unavailable_errors",
+          "refId": "C"
+        }
+      ],
+      "title": "Errors Break Down",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 112
+      },
+      "id": 233,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(store_requests{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:persistence_ts_request_per_op:rate1m{cluster=\"$cluster\"}",
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Requests Per Operation",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 112
+      },
+      "id": 234,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#sum by (operation) (rate(store_errors{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:persistence_ts_error_per_op:rate1m{cluster=\"$cluster\"}",
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Errors Per Operation",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 120
+      },
+      "id": 235,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#(sum by (operation) (rate(store_latency_sum{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(store_latency_count{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:persistence_ts_avg_latency_per_op:rate1m{cluster=\"$cluster\"}",
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Avg Latency Per Operation",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 120
+      },
+      "id": 236,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "exemplar": true,
+          "expr": "#histogram_quantile($quantile, sum by (operation, le) (rate(store_latency_bucket{cluster=\"$cluster\",operation=~\".*WorkflowHistory\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:ts_persistence_latency_by_op:histogram_quantile_1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+          "interval": "",
+          "legendFormat": "{{operation}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Percentile $quantile Latency Per Operation",
+      "type": "timeseries"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 128
+      },
+      "id": 96,
+      "panels": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 2353
+          },
+          "id": 100,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum(rate(cache_requests{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_request:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "cache_requests",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum(rate(cache_errors{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_failures:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "cache_failures",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum(rate(cache_miss{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_miss:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "cache_miss",
+              "refId": "D"
+            }
+          ],
+          "title": "Mutable State Cache Operation - Non Current",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": [
+              {
+                "__systemRef": "hideSeriesFrom",
+                "matcher": {
+                  "id": "byNames",
+                  "options": {
+                    "mode": "exclude",
+                    "names": ["cache_miss"],
+                    "prefix": "All except:",
+                    "readOnly": true
+                  }
+                },
+                "properties": [
+                  {
+                    "id": "custom.hideFrom",
+                    "value": {
+                      "legend": false,
+                      "tooltip": false,
+                      "viz": true
+                    }
+                  }
+                ]
+              }
+            ]
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 2353
+          },
+          "id": 202,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum(rate(cache_requests{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_request:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "cache_requests",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum(rate(cache_errors{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_failures:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "cache_failures",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#sum(rate(cache_miss{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_miss:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "cache_miss",
+              "refId": "D"
+            }
+          ],
+          "title": "Mutable State Cache Operation - Current",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 2361
+          },
+          "id": 97,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.5, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.5\",cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "P50",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.95, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.95\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P95",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.99, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.99\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P99",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.999, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.999\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P99.9",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.9999, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.9999\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P99.99",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(1, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreate\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_non_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"1.0\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P100",
+              "range": true,
+              "refId": "F"
+            }
+          ],
+          "title": "Mutable State Cache P* Lock Acquire Latency - Non Current",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 2361
+          },
+          "id": 201,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.5, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.5\",cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "P50",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.95, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.95\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P95",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.99, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.99\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P99",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.999, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.999\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P99.9",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(0.9999, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"0.9999\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P99.99",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile(1, sum by (le) (rate(cache_latency_bucket{cache_type=\"mutablestate\",cluster=\"$cluster\",operation=\"HistoryCacheGetOrCreateCurrent\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:mutable_state_current_cache_lock_acquire_latency:histogram_quantile{quantile=\"1.0\",cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P100",
+              "range": true,
+              "refId": "F"
+            }
+          ],
+          "title": "Mutable State Cache P* Lock Acquire Latency - Current",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "s"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 2369
+          },
+          "id": 352,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(1.0, sum by (operation, le) (rate(element_iolock_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))",
+              "interval": "",
+              "legendFormat": "P100",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.99, sum by (operation, le) (rate(element_iolock_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P99",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile(0.90, sum by (operation, le) (rate(element_iolock_latency_bucket{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate])))",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "P90",
+              "range": true,
+              "refId": "D"
+            }
+          ],
+          "title": "CDS Element Lock Acquire Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Pined obj count in mutable state cache. ",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 2369
+          },
+          "id": 348,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "cache_pinned_usage{cluster=\"$cluster\", cache_type=\"mutablestate\"}",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "{{pod}}",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Mutable State Cache Pinned Objects",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Cache hit ratios for CDS caches. Generally, expected element cache ratios are >90%, execution cache ratios are ~40%.",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "axisSoftMax": 1,
+                "axisSoftMin": 0.8,
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "decimals": 1,
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  }
+                ]
+              },
+              "unit": "percentunit"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 2377
+          },
+          "id": 392,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "# sum(rate(element_memory_loads{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])) / (sum(rate(element_store_loads{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])) + sum(rate(element_memory_loads{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])))\ntemporal:v1:storage_element_memory_load_ratio{cluster=\"$cluster\"}",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "Element Hit Ratio",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "1 - \n(sum(rate(cds_execution_cache_misses{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])) / (sum(rate(cds_execution_cache_misses{cluster=\"$cluster\",container=\"history\"}[$__rate_interval])) + sum(rate(cds_execution_cache_hits{cluster=\"$cluster\",container=\"history\"}[$__rate_interval]))))",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "Execution Hit Ratio",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "CDS Cache",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "description": "Entry count in mutable state cache. ",
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 2377
+          },
+          "id": 347,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "avg(cache_usage{cluster=\"$cluster\", cache_type=\"mutablestate\"})",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "cache_usage",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "avg(cache_pinned_usage{cluster=\"$cluster\", cache_type=\"mutablestate\"})",
+              "hide": false,
+              "instant": false,
+              "legendFormat": "cache_pinned_usage",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "Mutable State Cache Usage",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Mutable State Cache",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 129
+      },
+      "id": 107,
+      "panels": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 24,
+            "x": 0,
+            "y": 87416
+          },
+          "id": 115,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(cache_requests{cache_type=\"events\",cluster=\"$cluster\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:event_cache_requests:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(cache_miss{cache_type=\"events\",cluster=\"$cluster\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:event_cache_miss:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "{{ operation }} - cache_miss",
+              "refId": "B"
+            }
+          ],
+          "title": "Events Cache - Overview",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 87424
+          },
+          "id": 105,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(cache_requests{cache_type=\"events\",cluster=\"$cluster\",operation=\"EventsCacheGetEvent\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:get_events_requests:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "requests",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(cache_miss{cache_type=\"events\",cluster=\"$cluster\",operation=\"EventsCacheGetEvent\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:get_events_miss:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "cache_miss",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum by (operation) (rate(cache_errors{cache_type=\"events\",cluster=\"$cluster\",operation=\"EventsCacheGetEvent\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:get_events_error:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "errors",
+              "refId": "C"
+            }
+          ],
+          "title": "Get Events",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 87424
+          },
+          "id": 109,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#histogram_quantile(0.95, sum by (le) (rate(cache_latency_bucket{cache_type=\"events\",cluster=\"$cluster\",operation=\"EventsCacheGetEvent\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:get_events_latency:histogram_quantile{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "latency",
+              "refId": "A"
+            }
+          ],
+          "title": "Get Events Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 87432
+          },
+          "id": 110,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(cache_requests{cache_type=\"events\",cluster=\"$cluster\",operation=\"EventsCachePutEvent\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:put_events_requests:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "requests",
+              "refId": "A"
+            }
+          ],
+          "title": "Put Events",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 87432
+          },
+          "id": 111,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#histogram_quantile(0.95, sum by (le) (rate(cache_latency_bucket{cache_type=\"events\",cluster=\"$cluster\",operation=\"EventsCachePutEvent\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:put_events_latency:histogram_quantile{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "latency",
+              "refId": "A"
+            }
+          ],
+          "title": "Put Events Latency",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 87440
+          },
+          "id": 112,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(cache_requests{cache_type=\"events\",cluster=\"$cluster\",operation=\"EventsCacheGetFromStore\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:get_events_from_store:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "requests",
+              "refId": "A"
+            }
+          ],
+          "title": "Get Events From Store",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 87440
+          },
+          "id": 113,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#histogram_quantile(0.95, sum by (le) (rate(cache_latency_bucket{cache_type=\"events\",cluster=\"$cluster\",operation=\"EventsCacheGetFromStore\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:get_events_from_store_latency:histogram_quantile{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "latency",
+              "refId": "A"
+            }
+          ],
+          "title": "Get Events From Store Latency",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Events Cache",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 130
+      },
+      "id": 119,
+      "panels": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 24,
+            "x": 0,
+            "y": 219
+          },
+          "id": 117,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"ScheduleActivityTask\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_schedule_activity:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "schedule_activity",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"CompleteWorkflowExecution\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_complete_workflow:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "complete_workflow",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"FailWorkflowExecution\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_fail_workflow:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "fail_workflow",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"CancelWorkflowExecution\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_cancel_workflow:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "cancel_workflow",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"StartTimer\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_start_timer:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "start_timer",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"RequestCancelActivityTask\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_cancel_activity:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "cancel_activity",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"CancelTimer\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_cancel_timer:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "cancel_timer",
+              "range": true,
+              "refId": "G"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"RecordMarker\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_record_marker:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "record_marker",
+              "range": true,
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"RequestCancelExternalWorkflowExecution\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_cancel_external_workflow:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "cancel_external_workflow",
+              "range": true,
+              "refId": "I"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"ContinueAsNewWorkflowExecution\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_continue_as_new:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "continue_as_new",
+              "range": true,
+              "refId": "J"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"SignalExternalWorkflowExecution\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_signal_external_workflow:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "signal_external_workflow",
+              "range": true,
+              "refId": "K"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"UpsertWorkflowSearchAttributes\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_upsert_searchable_attributes:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "upsert_searchable_attributes",
+              "range": true,
+              "refId": "L"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum(rate(command{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",commandType=\"StartChildWorkflowExecution\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_task_child_workflow:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "child_workflow",
+              "range": true,
+              "refId": "M"
+            }
+          ],
+          "title": "Workflow Task Break Down",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 227
+          },
+          "id": 128,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(service_requests{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_command_completed:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "commands_complete",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(complete_workflow_task_sticky_enabled_count{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_sticky_tasks:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "sticky_tasks",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(complete_workflow_task_sticky_disabled_count{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_non_sticky_tasks:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "non_sticky_tasks",
+              "refId": "C"
+            }
+          ],
+          "title": "Sticky Vs Non Sticky",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 227
+          },
+          "id": 121,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(failed_workflow_tasks{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_failed_task:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "failed",
+              "refId": "A"
+            }
+          ],
+          "title": "Failed Workflow Task",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 235
+          },
+          "id": 122,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(empty_completion_commands{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_empty_task:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "empty_decision",
+              "refId": "A"
+            }
+          ],
+          "title": "Empty Workflow Tasks",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 235
+          },
+          "id": 123,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "expr": "#sum(rate(multiple_completion_commands{cluster=\"$cluster\",operation=\"RespondWorkflowTaskCompleted\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_multiple_completed:rate1m{cluster=\"$cluster\"}",
+              "interval": "",
+              "legendFormat": "empty_workflow",
+              "refId": "A"
+            }
+          ],
+          "title": "Multiple Complete Workflows",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "links": [],
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 243
+          },
+          "id": 354,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "#sum by (task_category) (rate(dlq_writes{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_dlq_writes:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "{{task_category}}",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "DLQ Writes",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Workflow Task Insights",
+      "type": "row"
+    },
+    {
+      "collapsed": false,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 131
+      },
+      "id": 127,
+      "panels": [],
+      "title": "Workflow Finish Insights",
+      "type": "row"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green"
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "workflow_success",
+                  "workflow_cancel",
+                  "workflow_failed",
+                  "workflow_timeout",
+                  "workflow_terminate",
+                  "workflow_continued_as_new"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 132
+      },
+      "id": 125,
+      "options": {
+        "alertThreshold": true,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.4.0",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum(rate(workflow_success{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_finish_insight_success:rate1m{cluster=\"$cluster\"}",
+          "interval": "",
+          "legendFormat": "workflow_success",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum(rate(workflow_cancel{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_finish_insight_cancel:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "workflow_cancel",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum(rate(workflow_failed{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_finish_insight_fail:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "workflow_failed",
+          "range": true,
+          "refId": "C"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum(rate(workflow_timeout{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_finish_insight_timeout:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "workflow_timeout",
+          "range": true,
+          "refId": "D"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum(rate(workflow_terminate{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_finish_insight_terminate:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "workflow_terminate",
+          "range": true,
+          "refId": "E"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": true,
+          "expr": "#sum(rate(workflow_continued_as_new{cluster=\"$cluster\",temporal_service_type=\"history\"}[$rate]))\n# use the raw query above if needed\ntemporal:v0:workflow_finish_insight_continue_as_new:rate1m{cluster=\"$cluster\"}",
+          "hide": false,
+          "interval": "",
+          "legendFormat": "workflow_continued_as_new",
+          "range": true,
+          "refId": "F"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "expr": "temporal:v0:workflow_finish_insight_success:rate1m{cluster=\"$cluster\"}\n+\ntemporal:v0:workflow_finish_insight_cancel:rate1m{cluster=\"$cluster\"}\n+\ntemporal:v0:workflow_finish_insight_fail:rate1m{cluster=\"$cluster\"}\n+\ntemporal:v0:workflow_finish_insight_timeout:rate1m{cluster=\"$cluster\"}\n+\ntemporal:v0:workflow_finish_insight_terminate:rate1m{cluster=\"$cluster\"}\n+\ntemporal:v0:workflow_finish_insight_continue_as_new:rate1m{cluster=\"$cluster\"}\n",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "total",
+          "range": true,
+          "refId": "G"
+        }
+      ],
+      "title": "Workflow Finish Stats",
+      "type": "timeseries"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 140
+      },
+      "id": 173,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "bytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 2853
+          },
+          "id": 198,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(mutable_state_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(mutable_state_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_mutable_state_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "mutable_state_size",
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(execution_info_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(execution_info_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_execution_info_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "execution_info_size",
+              "refId": "I"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(execution_state_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(execution_state_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_execution_state_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "execution_state_size",
+              "refId": "J"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(activity_info_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(activity_info_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_activity_info_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "activity_info_size",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(timer_info_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(timer_info_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_timer_info_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "timer_info_size",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(child_info_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(child_info_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_child_info_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "child_info_size",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(request_cancel_info_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(request_cancel_info_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_request_cancel_info_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "request_cancel_info_size",
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(signal_info_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(signal_info_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_signal_info_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "signal_info_size",
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(signal_request_id_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(signal_request_id_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:workflow_insight_avg_signal_request_id_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "signal_request_id_size",
+              "range": true,
+              "refId": "K"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(buffered_events_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(buffered_events_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_buffered_events_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "buffered_events_size",
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(history_size_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(history_size_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_history_size:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "history_size",
+              "refId": "G"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.11.2",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp) AS time,\n    -- namespace,\n    avg(activity_info_size) AS activity_info_size,\n    avg(buffered_events_size) AS buffered_events_size,\n    avg(child_info_size) AS child_info_size,\n    avg(execution_info_size) AS execution_info_size,\n    avg(execution_state_size) AS execution_state_size,\n    avg(history_size) AS history_size,\n    avg(mutable_state_size) AS mutable_state_size,\n    avg(request_cancel_info_size) AS request_cancel_info_size,\n    avg(signal_info_size) AS signal_info_size,\n    avg(signal_request_id_size) AS signal_request_id_size,\n    avg(timer_info_size) AS timer_info_size,\n    avg(chasm_total_size) AS chasm_size\nFROM global_event_mutable_state_status\nWHERE\n    $__timeFilter(timestamp)\n    AND cluster = '$cluster'\n    AND operation='SessionStats'\n    -- AND namespace='ns.account'\nGROUP BY time -- , namespace\nORDER BY time",
+              "refId": "L"
+            }
+          ],
+          "title": "Avg Mutable State Size",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              },
+              "unit": "bytes"
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 2853
+          },
+          "id": 178,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(mutable_state_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_mutable_state_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "mutable_state_size",
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(execution_info_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_execution_info_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "execution_info_size",
+              "refId": "I"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(execution_state_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_execution_state_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "execution_state_size",
+              "refId": "J"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(activity_info_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_activity_info_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "activity_info_size",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(timer_info_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_timer_info_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "timer_info_size",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(child_info_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_child_info_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "child_info_size",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(request_cancel_info_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_request_cancel_info_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "request_cancel_info_size",
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(signal_info_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_signal_info_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "signal_info_size",
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(signal_request_id_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_signal_request_id_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "signal_request_id_size",
+              "range": true,
+              "refId": "K"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(buffered_events_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_buffered_events_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "buffered_events_size",
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(history_size_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_history_size:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "history_size",
+              "range": true,
+              "refId": "G"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.11.2",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp) AS time,\n    -- namespace,\n    quantile(${quantile})(activity_info_size) AS activity_info_size,\n    quantile(${quantile})(buffered_events_size) AS buffered_events_size,\n    quantile(${quantile})(child_info_size) AS child_info_size,\n    quantile(${quantile})(execution_info_size) AS execution_info_size,\n    quantile(${quantile})(execution_state_size) AS execution_state_size,\n    quantile(${quantile})(history_size) AS history_size,\n    quantile(${quantile})(mutable_state_size) AS mutable_state_size,\n    quantile(${quantile})(request_cancel_info_size) AS request_cancel_info_size,\n    quantile(${quantile})(signal_info_size) AS signal_info_size,\n    quantile(${quantile})(signal_request_id_size) AS signal_request_id_size,\n    quantile(${quantile})(timer_info_size) AS timer_info_size,\n    quantile(${quantile})(chasm_total_size) AS chasm_size\nFROM global_event_mutable_state_status\nWHERE\n    $__timeFilter(timestamp)\n    AND cluster = '$cluster'\n    AND operation='SessionStats'\n    -- AND namespace='ns.account'\nGROUP BY time -- , namespace\nORDER BY time\n",
+              "refId": "L"
+            }
+          ],
+          "title": "Percentile $quantile Mutable State Size",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 2861
+          },
+          "id": 174,
+          "interval": "30s",
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    -- namespace\n    sum(events_sum) / sum(events_count) as avg\nFROM global_metric_task_count_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '$cluster'\n    -- AND namespace='prod.infra'\nGROUP BY time\n-- , namespace\nORDER BY time",
+              "refId": "task_count"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    -- namespace,\n    sum(events_sum) / sum(events_count) as avg\nFROM global_metric_history_count_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND metric='history_count'\n    AND cluster = '$cluster'\n    -- AND namespace='ns.account'\nGROUP BY time\n-- , namespace\nORDER BY time",
+              "refId": "history_count"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(activity_info_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(activity_info_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_activity_info:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "activity_info_count",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(timer_info_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(timer_info_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_timer_info:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "timer_info_count",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(child_info_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(child_info_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_child_info:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "child_info_count",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(request_cancel_info_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(request_cancel_info_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_request_cancel_info:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "request_cancel_info_count",
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(signal_info_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(signal_info_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_signal_info:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "signal_info_count",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(signal_request_id_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(signal_request_id_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:workflow_insight_avg_signal_request_id_count:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "signal_request_id_count",
+              "range": true,
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(buffered_events_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(buffered_events_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_buffered_events:rate1m{cluster=\"$cluster\"}",
+              "hide": false,
+              "interval": "",
+              "legendFormat": "buffered_events_count",
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(task_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(task_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:workflow_insight_avg_task_count:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "task_count",
+              "range": true,
+              "refId": "I"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(history_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(history_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v0:workflow_insight_avg_history:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "history_count",
+              "refId": "G"
+            }
+          ],
+          "title": "Avg Mutable State Pending Entries Count",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 2861
+          },
+          "id": 389,
+          "interval": "30s",
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": true,
+              "meta": {},
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    -- namespace\n    quantileInterpolatedWeighted(${quantile})(bucket, events_count) AS \"p$quantile\"\nFROM global_metric_task_count_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '$cluster'\n    AND operation = 'SessionStats'\n    -- AND namespace='prod.infra'\nGROUP BY time\n-- , namespace\nORDER BY time",
+              "refId": "task_count"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(activity_info_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_pending_activity_info_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "activity_info_count",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(timer_info_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_pending_timer_info_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "timer_info_count",
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(child_info_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_pending_child_info_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "child_info_count",
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(request_cancel_info_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_pending_request_cancel_info_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "request_cancel_info_count",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(signal_info_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_pending_signal_info_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "signal_info_count",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(signal_request_id_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_pending_signal_request_id_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "signal_request_id_count",
+              "range": true,
+              "refId": "H"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(buffered_events_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_pending_buffered_events_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "buffered_events_count",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(task_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_pending_task_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "task_count",
+              "range": true,
+              "refId": "I"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "histogram_quantile($quantile, sum by (le) (rate(history_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\n#temporal:v1:mutable_state_pending_history_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "history_count",
+              "range": true,
+              "refId": "G"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp) AS time,\n    -- namespace,\n    quantile(${quantile})(activity_info_count) AS activity_info_count,\n    quantile(${quantile})(buffered_events_count) AS buffered_events_count,\n    quantile(${quantile})(child_info_count) AS child_info_count,\n    quantile(${quantile})(history_count) AS history_count,\n    quantile(${quantile})(request_cancel_info_count) AS request_cancel_info_count,\n    quantile(${quantile})(signal_info_count) AS signal_info_count,\n    quantile(${quantile})(signal_request_id_count) AS signal_request_id_count,\n    quantile(${quantile})(timer_info_count) AS timer_info_count,\n    quantile(${quantile})(task_count_outbound + task_count_replication + task_count_tiered_storage + task_count_timer + task_count_transfer + task_count_visibility) AS task_count\nFROM global_event_mutable_state_status\nWHERE\n    $__timeFilter(timestamp)\n    AND cluster = '$cluster'\n    AND operation='SessionStats'\n    -- AND namespace='ns.account'\nGROUP BY time -- , namespace\nORDER BY time\n",
+              "refId": "J"
+            }
+          ],
+          "title": "Percentile $quantile Mutable State Pending Entries Count",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 2869
+          },
+          "id": 329,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(total_activity_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(total_activity_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:avg_mutable_state_entries_total_activity_count:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_activity_count",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(total_user_timer_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(total_user_timer_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:avg_mutable_state_entries_total_user_timer_count:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_user_timer_count",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(total_child_execution_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(total_child_execution_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:avg_mutable_state_entries_total_child_execution_count:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_child_execution_count",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(total_request_cancel_external_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(total_request_cancel_external_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:avg_mutable_state_entries_total_request_cancel_external_count:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_request_cancel_external_count",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(total_signal_external_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(total_signal_external_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:avg_mutable_state_entries_total_signal_external_count:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_signal_external_count",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#(sum by (operation) (rate(total_signal_count_sum{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate]))) / (sum by (operation) (rate(total_signal_count_count{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:avg_mutable_state_entries_total_signal_count:rate1m{cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_signal_count",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp) AS time,\n    -- namespace,\n    avg(total_activity_count) AS total_activity_count,\n    avg(total_child_execution_count) AS total_child_execution_count,\n    avg(total_request_cancel_external_count) AS total_request_cancel_external_count,\n    avg(total_signal_count) AS total_signal_count,\n    avg(total_signal_external_count) AS total_signal_external_count,\n    avg(total_user_timer_count) AS total_user_timer_count\nFROM global_event_mutable_state_status\nWHERE\n    $__timeFilter(timestamp)\n    AND cluster = '$cluster'\n    AND operation='SessionStats'\n    -- AND namespace='ns.account'\nGROUP BY time -- , namespace\nORDER BY time\n",
+              "refId": "G"
+            }
+          ],
+          "title": "Avg Mutable State Total Entries Count",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 2869
+          },
+          "id": 330,
+          "options": {
+            "alertThreshold": true,
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(total_activity_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_entries_total_activity_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_activity_count",
+              "range": true,
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(total_user_timer_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_entries_total_user_timer_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_user_timer_count",
+              "range": true,
+              "refId": "B"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(total_child_execution_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_entries_total_child_execution_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_child_execution_count",
+              "range": true,
+              "refId": "C"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(total_request_cancel_external_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_entries_total_request_cancel_external_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_request_cancel_external_count",
+              "range": true,
+              "refId": "D"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(total_signal_external_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_entries_total_signal_external_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_signal_external_count",
+              "range": true,
+              "refId": "E"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "exemplar": true,
+              "expr": "#histogram_quantile($quantile, sum by (le) (rate(total_signal_count_bucket{cluster=\"$cluster\",operation=\"SessionStats\",temporal_service_type=\"history\"}[$rate])))\n# use the raw query above if needed\ntemporal:v1:mutable_state_entries_total_signal_count:histogram_quantile1m{quantile=\"$quantile\",cluster=\"$cluster\"}",
+              "hide": true,
+              "interval": "",
+              "legendFormat": "total_signal_count",
+              "range": true,
+              "refId": "F"
+            },
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 1,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.5.0",
+              "queryType": "table",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    $__timeInterval(timestamp) AS time,\n    -- namespace,\n    quantile(${quantile})(total_activity_count) AS total_activity_count,\n    quantile(${quantile})(total_child_execution_count) AS total_child_execution_count,\n    quantile(${quantile})(total_request_cancel_external_count) AS total_request_cancel_external_count,\n    quantile(${quantile})(total_signal_count) AS total_signal_count,\n    quantile(${quantile})(total_signal_external_count) AS total_signal_external_count,\n    quantile(${quantile})(total_user_timer_count) AS total_user_timer_count\nFROM global_event_mutable_state_status\nWHERE\n    $__timeFilter(timestamp)\n    AND cluster = '$cluster'\n    AND operation='SessionStats'\n    -- AND namespace='ns.account'\nGROUP BY time -- , namespace\nORDER BY time\n",
+              "refId": "G"
+            }
+          ],
+          "title": "Percentile $quantile Mutable State Total Entries Count",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Workflow Insights",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 141
+      },
+      "id": 402,
+      "panels": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "af7fe237-211e-413e-9723-41a73886bcbb"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 1023
+          },
+          "id": 403,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "editorMode": "code",
+              "expr": "rate(workflow_update_registry_size_bucket{cluster=\"$cluster\"}[$__rate_interval])",
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Workflow Update Registry Size",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "af7fe237-211e-413e-9723-41a73886bcbb"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 1023
+          },
+          "id": 405,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "editorMode": "code",
+              "expr": "rate(workflow_update_registry_size_count{cluster=\"$cluster\"}[$__rate_interval])",
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Workflow Update Limited",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "af7fe237-211e-413e-9723-41a73886bcbb"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 1031
+          },
+          "id": 408,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "editorMode": "code",
+              "expr": "rate(workflow_update_sent_to_worker{cluster=\"$cluster\"}[$__rate_interval])",
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Workflow Update Sent To Worker",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "af7fe237-211e-413e-9723-41a73886bcbb"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 1031
+          },
+          "id": 406,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "editorMode": "code",
+              "expr": "rate(workflow_update_request_rate_limited{cluster=\"$cluster\"}[$__rate_interval])",
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Workflow Update Throttling",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "af7fe237-211e-413e-9723-41a73886bcbb"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 0,
+            "y": 1039
+          },
+          "id": 404,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "editorMode": "code",
+              "expr": "group by (reason) (rate(workflow_update_aborted{cluster=\"$cluster\"}[$__rate_interval]))",
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Workflow Update Aborted Reason",
+          "type": "timeseries"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "af7fe237-211e-413e-9723-41a73886bcbb"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisBorderShow": false,
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "barWidthFactor": 0.6,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "insertNulls": false,
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "type": "linear"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 12,
+            "x": 12,
+            "y": 1039
+          },
+          "id": 407,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "pluginVersion": "11.4.0",
+          "targets": [
+            {
+              "editorMode": "code",
+              "expr": "rate(workflow_update_client_timeout{cluster=\"$cluster\"}[$__rate_interval])",
+              "legendFormat": "__auto",
+              "range": true,
+              "refId": "A"
+            }
+          ],
+          "title": "Workflow Update Timeouts",
+          "type": "timeseries"
+        }
+      ],
+      "title": "Workflow Update Insights",
+      "type": "row"
+    },
+    {
+      "collapsed": true,
+      "gridPos": {
+        "h": 1,
+        "w": 24,
+        "x": 0,
+        "y": 142
+      },
+      "id": 215,
+      "panels": [
+        {
+          "datasource": {
+            "type": "datasource",
+            "uid": "-- Mixed --"
+          },
+          "fieldConfig": {
+            "defaults": {
+              "color": {
+                "mode": "palette-classic"
+              },
+              "custom": {
+                "axisCenteredZero": false,
+                "axisColorMode": "text",
+                "axisLabel": "",
+                "axisPlacement": "auto",
+                "barAlignment": 0,
+                "drawStyle": "line",
+                "fillOpacity": 0,
+                "gradientMode": "none",
+                "hideFrom": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": false
+                },
+                "lineInterpolation": "linear",
+                "lineWidth": 1,
+                "pointSize": 5,
+                "scaleDistribution": {
+                  "log": 2,
+                  "type": "log"
+                },
+                "showPoints": "auto",
+                "spanNulls": false,
+                "stacking": {
+                  "group": "A",
+                  "mode": "none"
+                },
+                "thresholdsStyle": {
+                  "mode": "off"
+                }
+              },
+              "mappings": [],
+              "thresholds": {
+                "mode": "absolute",
+                "steps": [
+                  {
+                    "color": "green"
+                  },
+                  {
+                    "color": "red",
+                    "value": 80
+                  }
+                ]
+              }
+            },
+            "overrides": []
+          },
+          "gridPos": {
+            "h": 8,
+            "w": 24,
+            "x": 0,
+            "y": 105109
+          },
+          "id": 217,
+          "options": {
+            "legend": {
+              "calcs": [],
+              "displayMode": "list",
+              "placement": "bottom",
+              "showLegend": true
+            },
+            "tooltip": {
+              "mode": "single",
+              "sort": "none"
+            }
+          },
+          "targets": [
+            {
+              "datasource": {
+                "type": "grafana-clickhouse-datasource",
+                "uid": "${clickhouse_datasource}"
+              },
+              "editorType": "sql",
+              "format": 0,
+              "hide": false,
+              "meta": {
+                "builderOptions": {
+                  "columns": [],
+                  "database": "",
+                  "limit": 1000,
+                  "mode": "list",
+                  "queryType": "table",
+                  "table": ""
+                }
+              },
+              "pluginVersion": "4.4.0",
+              "queryType": "timeseries",
+              "rawSql": "/* Uncomment the namespace parts below in order to filter on a specific namespace */\nSELECT\n    toStartOfInterval(timestamp_interval, INTERVAL $__interval_s second) as time,\n    -- namespace\n    sum(events_sum) / sum(events_count) as p50,\n    quantileInterpolatedWeighted(0.25)(bucket, events_sum) AS p25,\n    quantileInterpolatedWeighted(0.75)(bucket, events_sum) AS p75,\n    quantileInterpolatedWeighted(0.95)(bucket, events_sum) AS p95,\n    quantileInterpolatedWeighted(0.99)(bucket, events_sum) AS p99\nFROM global_metric_state_transition_count_agg_30s\nWHERE\n    $__timeFilter(timestamp_interval)\n    AND cluster = '$cluster'\n    -- AND namespace='ns.account'\nGROUP BY time, cluster\n-- , namespace\nORDER BY time",
+              "refId": "A"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "# sum(rate(state_transition_count_sum{cluster=\"$cluster\"}[1m]))\n# /\n# sum(rate(state_transition_count_count{cluster=\"$cluster\"}[1m]))",
+              "hide": true,
+              "legendFormat": "avg",
+              "range": true,
+              "refId": "I"
+            },
+            {
+              "datasource": {
+                "type": "prometheus",
+                "uid": "${datasource}"
+              },
+              "editorMode": "code",
+              "expr": "# histogram_quantile(0.99, sum(rate(state_transition_count_bucket{cluster=\"$cluster\"}[1m])) by (le))",
+              "hide": true,
+              "legendFormat": "p99",
+              "range": true,
+              "refId": "B"
+            }
+          ],
+          "title": "State Transition Percentile",
+          "type": "timeseries"
+        }
+      ],
+      "title": "State Transition",
+      "type": "row"
+    }
+  ],
+  "preload": false,
+  "refresh": "",
+  "schemaVersion": 40,
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "text": "prod",
+          "value": "prod"
+        },
+        "includeAll": false,
+        "label": "env",
+        "name": "env",
+        "options": [
+          {
+            "selected": true,
+            "text": "prod",
+            "value": "prod"
+          },
+          {
+            "selected": false,
+            "text": "dev",
+            "value": "test"
+          }
+        ],
+        "query": "prod : prod, dev : test",
+        "type": "custom"
+      },
+      {
+        "current": {
+          "text": "prod thanos",
+          "value": "af7fe237-211e-413e-9723-41a73886bcbb"
+        },
+        "hide": 2,
+        "includeAll": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "${env:text} thanos",
+        "type": "datasource"
+      },
+      {
+        "current": {
+          "text": "prod-proxy-chronicle",
+          "value": "aey7czk2sodtse"
+        },
+        "hide": 2,
+        "includeAll": false,
+        "name": "clickhouse_datasource",
+        "options": [],
+        "query": "grafana-clickhouse-datasource",
+        "refresh": 1,
+        "regex": "${env}-proxy-chronicle",
+        "type": "datasource"
+      },
+      {
+        "current": {
+          "text": "prod loki",
+          "value": "e008932a-e9dc-4b7a-819f-68b662f3dc51"
+        },
+        "hide": 2,
+        "includeAll": false,
+        "name": "logs_datasource",
+        "options": [],
+        "query": "loki",
+        "refresh": 1,
+        "regex": "${env:text}.*",
+        "type": "datasource"
+      },
+      {
+        "current": {
+          "text": "s-aw003",
+          "value": "s-aw003"
+        },
+        "datasource": {
+          "type": "prometheus",
+          "uid": "${datasource}"
+        },
+        "definition": "label_values(cluster)",
+        "includeAll": false,
+        "name": "cluster",
+        "options": [],
+        "query": {
+          "qryType": 1,
+          "query": "label_values(cluster)",
+          "refId": "PrometheusVariableQueryEditor-VariableQuery"
+        },
+        "refresh": 2,
+        "regex": "s-.*",
+        "sort": 1,
+        "type": "query"
+      },
+      {
+        "current": {
+          "text": "history-headless",
+          "value": "history-headless"
+        },
+        "datasource": {
+          "type": "prometheus",
+          "uid": "${datasource}"
+        },
+        "definition": "label_values(restarts,service)",
+        "includeAll": false,
+        "name": "service",
+        "options": [],
+        "query": {
+          "query": "label_values(restarts,service)",
+          "refId": "StandardVariableQuery"
+        },
+        "refresh": 2,
+        "regex": "",
+        "type": "query"
+      },
+      {
+        "current": {
+          "text": "0.99",
+          "value": "0.99"
+        },
+        "includeAll": false,
+        "name": "quantile",
+        "options": [
+          {
+            "selected": false,
+            "text": "0.5",
+            "value": "0.5"
+          },
+          {
+            "selected": true,
+            "text": "0.99",
+            "value": "0.99"
+          },
+          {
+            "selected": false,
+            "text": "1.0",
+            "value": "1.0"
+          }
+        ],
+        "query": "0.5,0.99,1.0",
+        "type": "custom"
+      },
+      {
+        "current": {
+          "text": "$__rate_interval",
+          "value": "$__rate_interval"
+        },
+        "includeAll": false,
+        "label": "rate interval",
+        "name": "rate",
+        "options": [
+          {
+            "selected": false,
+            "text": "1m",
+            "value": "1m"
+          },
+          {
+            "selected": false,
+            "text": "5m",
+            "value": "5m"
+          },
+          {
+            "selected": false,
+            "text": "10m",
+            "value": "10m"
+          },
+          {
+            "selected": false,
+            "text": "30m",
+            "value": "30m"
+          },
+          {
+            "selected": false,
+            "text": "$__interval",
+            "value": "$__interval"
+          },
+          {
+            "selected": true,
+            "text": "$__rate_interval",
+            "value": "$__rate_interval"
+          }
+        ],
+        "query": "1m,5m,10m,30m,$__interval,$__rate_interval",
+        "type": "custom"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-30m",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": ["1m", "5m", "15m", "30m", "1h", "2h", "1d"]
+  },
+  "timezone": "utc",
+  "title": "History",
+  "uid": "jh_LXEin2",
+  "version": 202,
+  "weekStart": ""
+}

From 0f1bae754dca54d02a54665fedf98315696b4998 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Tue, 24 Mar 2026 20:03:17 -0400
Subject: [PATCH 29/40] commands

---
 commands.sh | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/commands.sh b/commands.sh
index 272d707a..5ea05a67 100644
--- a/commands.sh
+++ b/commands.sh
@@ -63,14 +63,14 @@ go run ./cmd run-scenario-with-worker \
   --scenario workflow_with_single_activity \
   --language go \
   --run-id run-1 \
-  --duration 1h --max-concurrent 500 --max-iterations-per-second 50 \
+  --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
   --worker-max-concurrent-workflow-pollers 40 \
   --worker-max-concurrent-workflow-tasks 500 \
   --worker-max-concurrent-activity-pollers 40 \
   --worker-max-concurrent-activities 500 \
   --do-not-register-search-attributes \
   --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
-  --namespace saa-cogs-4.temporal-dev \
+  --namespace $CELL.temporal-dev \
   --tls \
   --disable-tls-host-verification \
   --auth-header "Bearer $TEMPORAL_API_KEY"
@@ -85,7 +85,7 @@ go run ./cmd run-scenario-with-worker \
   --worker-max-concurrent-activities 500 \
   --do-not-register-search-attributes \
   --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
-  --namespace saa-cogs-4.temporal-dev \
+  --namespace $CELL.temporal-dev \
   --tls \
   --disable-tls-host-verification \
   --auth-header "Bearer $TEMPORAL_API_KEY"
@@ -94,3 +94,25 @@ ct ocld test dynamic-config namespace get -n saa-cogs-4.temporal-dev
 
 # 88ms RTT
 for i in $(seq 10); do curl -s -o /dev/null -w '%{time_connect}\n' https://us-west-2.aws.api.tmprl-test.cloud:7233; done
+
+# parameterized
+
+ct admintools --context $CELL -- temporal operator namespace list
+ct ocld test namespace create \
+  --namespace $CELL.temporal-dev \
+  --region us-west-2 \
+  --cloud-provider aws \
+  --retention 1 \
+  --placement-override-cell-id $CELL \
+  --auth-method api_key
+
+ct admintools --context $CELL -- temporal operator namespace list
+nslookup $CELL.temporal-dev.tmprl-test.cloud
+
+export TEMPORAL_API_KEY=xxx
+export TEMPORAL_ADDRESS=us-west-2.aws.api.tmprl-test.cloud:7233
+export TEMPORAL_NAMESPACE=$CELL.temporal-dev
+export TEMPORAL_TLS=true
+export TEMPORAL_TLS_DISABLE_HOST_VERIFICATION=true
+
+ct kubectl --context $CELL patch deployment/temporal-go-canary -n temporal -p '{"spec":{"replicas":0}}'

From 226047b1298e02a1124f8948f53902354052d811 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Tue, 24 Mar 2026 20:11:30 -0400
Subject: [PATCH 30/40] Add payloads

---
 commands.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/commands.sh b/commands.sh
index 5ea05a67..43ee5f1a 100644
--- a/commands.sh
+++ b/commands.sh
@@ -64,13 +64,14 @@ go run ./cmd run-scenario-with-worker \
   --language go \
   --run-id run-1 \
   --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
+  --option payload-size=1024 \
   --worker-max-concurrent-workflow-pollers 40 \
   --worker-max-concurrent-workflow-tasks 500 \
   --worker-max-concurrent-activity-pollers 40 \
   --worker-max-concurrent-activities 500 \
   --do-not-register-search-attributes \
   --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
-  --namespace $CELL.temporal-dev \
+  --namespace $NS.temporal-dev \
   --tls \
   --disable-tls-host-verification \
   --auth-header "Bearer $TEMPORAL_API_KEY"
@@ -81,11 +82,12 @@ go run ./cmd run-scenario-with-worker \
   --language go \
   --run-id run-1 \
   --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
+  --option payload-size=1024 \
   --worker-max-concurrent-activity-pollers 40 \
   --worker-max-concurrent-activities 500 \
   --do-not-register-search-attributes \
   --server-address us-west-2.aws.api.tmprl-test.cloud:7233 \
-  --namespace $CELL.temporal-dev \
+  --namespace $NS.temporal-dev \
   --tls \
   --disable-tls-host-verification \
   --auth-header "Bearer $TEMPORAL_API_KEY"

From 9dbfcf9c4f78d2b617cbc5a8b1d02550b0bf819d Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 10:53:59 -0400
Subject: [PATCH 31/40] Add configurable activity retries to both scenarios

New activity "payloadWithRetries" fails for N attempts then succeeds.
Both scenarios accept --option fail-for-attempts=N (default 0, no retries).
Retry backoff is 1ms with coefficient 1.0 to minimize wait time.
---
 scenarios/standalone_activity.go              | 24 ++++++++++++++-----
 scenarios/workflow_with_single_activity.go    |  2 ++
 workers/go/singleactivityworkflow/activity.go | 23 ++++++++++++++++++
 workers/go/singleactivityworkflow/workflow.go | 18 +++++++++++---
 workers/go/worker/worker.go                   |  1 +
 5 files changed, 59 insertions(+), 9 deletions(-)
 create mode 100644 workers/go/singleactivityworkflow/activity.go

diff --git a/scenarios/standalone_activity.go b/scenarios/standalone_activity.go
index d55547f0..f7dc4e05 100644
--- a/scenarios/standalone_activity.go
+++ b/scenarios/standalone_activity.go
@@ -18,12 +18,20 @@ func init() {
 			return &loadgen.GenericExecutor{
 				Execute: func(ctx context.Context, r *loadgen.Run) error {
 					payloadSize := r.ScenarioOptionInt("payload-size", 0)
+					failForAttempts := r.ScenarioOptionInt("fail-for-attempts", 0)
+					activityName := "payload"
+					var args []any
+					if failForAttempts > 0 {
+						activityName = "payloadWithRetries"
+						args = []any{make([]byte, payloadSize), int32(payloadSize), int32(failForAttempts)}
+					} else {
+						args = []any{make([]byte, payloadSize), int32(payloadSize)}
+					}
 					handle, err := r.Client.ExecuteActivity(
 						ctx,
-						activityOptions(r),
-						"payload",
-						make([]byte, payloadSize),
-						int32(payloadSize),
+						activityOptions(r, int32(failForAttempts)),
+						activityName,
+						args...,
 					)
 					if err != nil {
 						return err
@@ -35,7 +43,7 @@ func init() {
 	})
 }
 
-func activityOptions(r *loadgen.Run) client.StartActivityOptions {
+func activityOptions(r *loadgen.Run, failForAttempts int32) client.StartActivityOptions {
 	return client.StartActivityOptions{
 		ID: fmt.Sprintf(
 			"a-%s-%s-%d",
@@ -45,6 +53,10 @@ func activityOptions(r *loadgen.Run) client.StartActivityOptions {
 		),
 		TaskQueue:           r.TaskQueue(),
 		StartToCloseTimeout: 5 * time.Second,
-		RetryPolicy:         &temporal.RetryPolicy{MaximumAttempts: 1},
+		RetryPolicy: &temporal.RetryPolicy{
+			MaximumAttempts:    failForAttempts + 1,
+			InitialInterval:    1 * time.Millisecond,
+			BackoffCoefficient: 1.0,
+		},
 	}
 }
diff --git a/scenarios/workflow_with_single_activity.go b/scenarios/workflow_with_single_activity.go
index 6930a329..c1c55591 100644
--- a/scenarios/workflow_with_single_activity.go
+++ b/scenarios/workflow_with_single_activity.go
@@ -16,12 +16,14 @@ func init() {
 			return &loadgen.GenericExecutor{
 				Execute: func(ctx context.Context, r *loadgen.Run) error {
 					payloadSize := r.ScenarioOptionInt("payload-size", 0)
+					failForAttempts := r.ScenarioOptionInt("fail-for-attempts", 0)
 					handle, err := r.Client.ExecuteWorkflow(
 						ctx,
 						startWorkflowOptions(r),
 						"singleActivityWorkflow",
 						make([]byte, payloadSize),
 						int32(payloadSize),
+						int32(failForAttempts),
 					)
 					if err != nil {
 						return err
diff --git a/workers/go/singleactivityworkflow/activity.go b/workers/go/singleactivityworkflow/activity.go
new file mode 100644
index 00000000..b37035be
--- /dev/null
+++ b/workers/go/singleactivityworkflow/activity.go
@@ -0,0 +1,23 @@
+package singleactivityworkflow
+
+import (
+	"context"
+	"fmt"
+
+	"go.temporal.io/sdk/activity"
+	"go.temporal.io/sdk/temporal"
+)
+
+// PayloadWithRetries behaves like "payload" but fails with a retryable error
+// on attempts 1..failForAttempts, then succeeds on the next attempt.
+func PayloadWithRetries(ctx context.Context, inputData []byte, bytesToReturn int32, failForAttempts int32) ([]byte, error) {
+	if activity.GetInfo(ctx).Attempt <= failForAttempts {
+		return nil, temporal.NewApplicationError(
+			fmt.Sprintf("deliberate failure (attempt %d of %d)", activity.GetInfo(ctx).Attempt, failForAttempts),
+			"RetryableError", nil,
+		)
+	}
+	output := make([]byte, bytesToReturn)
+	copy(output, inputData)
+	return output, nil
+}
diff --git a/workers/go/singleactivityworkflow/workflow.go b/workers/go/singleactivityworkflow/workflow.go
index 95c394c7..7579326d 100644
--- a/workers/go/singleactivityworkflow/workflow.go
+++ b/workers/go/singleactivityworkflow/workflow.go
@@ -7,12 +7,24 @@ import (
 	"go.temporal.io/sdk/workflow"
 )
 
-func SingleActivityWorkflow(ctx workflow.Context, input []byte, outputSize int32) ([]byte, error) {
+func SingleActivityWorkflow(ctx workflow.Context, input []byte, outputSize int32, failForAttempts int32) ([]byte, error) {
+	activityName := "payload"
+	var args []any
+	if failForAttempts > 0 {
+		activityName = "payloadWithRetries"
+		args = []any{input, outputSize, failForAttempts}
+	} else {
+		args = []any{input, outputSize}
+	}
 	var output []byte
 	err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{
 		StartToCloseTimeout: 5 * time.Second,
-		RetryPolicy:         &temporal.RetryPolicy{MaximumAttempts: 1},
-	}), "payload", input, outputSize).Get(ctx, &output)
+		RetryPolicy: &temporal.RetryPolicy{
+			MaximumAttempts:    failForAttempts + 1,
+			InitialInterval:    1 * time.Millisecond,
+			BackoffCoefficient: 1.0,
+		},
+	}), activityName, args...).Get(ctx, &output)
 	if err != nil {
 		return nil, err
 	}
diff --git a/workers/go/worker/worker.go b/workers/go/worker/worker.go
index a221c7e0..bc1cfb90 100644
--- a/workers/go/worker/worker.go
+++ b/workers/go/worker/worker.go
@@ -114,6 +114,7 @@ func runWorkers(client client.Client, taskQueues []string, options clioptions.Wo
 			w.RegisterWorkflowWithOptions(schedulerstress.NoopScheduledWorkflow, workflow.RegisterOptions{Name: "NoopScheduledWorkflow"})
 			w.RegisterWorkflowWithOptions(schedulerstress.SleepScheduledWorkflow, workflow.RegisterOptions{Name: "SleepScheduledWorkflow"})
 			w.RegisterWorkflowWithOptions(singleactivityworkflow.SingleActivityWorkflow, workflow.RegisterOptions{Name: "singleActivityWorkflow"})
+			w.RegisterActivityWithOptions(singleactivityworkflow.PayloadWithRetries, activity.RegisterOptions{Name: "payloadWithRetries"})
 			w.RegisterNexusService(service)
 			errCh <- w.Run(worker.InterruptCh())
 		}()

From e5ebae8398774903bd8115462c21cfc4b957ff4c Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 11:04:16 -0400
Subject: [PATCH 32/40] commands

---
 commands.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/commands.sh b/commands.sh
index 43ee5f1a..efb09826 100644
--- a/commands.sh
+++ b/commands.sh
@@ -22,7 +22,7 @@ ct admintools --context s-saa-cogs -- temporal operator namespace list -o json
 
 # Create namespace pinned to the cell
 ct ocld test namespace create \
-  --namespace saa-cogs-4.temporal-dev \
+  --namespace $NS.temporal-dev \
   --region us-west-2 \
   --cloud-provider aws \
   --retention 1 \
@@ -99,6 +99,9 @@ for i in $(seq 10); do curl -s -o /dev/null -w '%{time_connect}\n' https://us-we
 
 # parameterized
 
+CELL=s-saa-cogs
+NS=saa-cogs-4
+
 ct admintools --context $CELL -- temporal operator namespace list
 ct ocld test namespace create \
   --namespace $CELL.temporal-dev \

From 012a6cff5acd9599f0a3a176a46ac8d0d6d97857 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 11:04:27 -0400
Subject: [PATCH 33/40] fail-for-attempts

---
 commands.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/commands.sh b/commands.sh
index efb09826..9db37f19 100644
--- a/commands.sh
+++ b/commands.sh
@@ -62,9 +62,10 @@ ct kubectl --context s-saa-cogs patch deployment/temporal-go-canary -n temporal
 go run ./cmd run-scenario-with-worker \
   --scenario workflow_with_single_activity \
   --language go \
-  --run-id run-1 \
+  --run-id run-2 \
   --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
   --option payload-size=1024 \
+  --option fail-for-attempts=9 \
   --worker-max-concurrent-workflow-pollers 40 \
   --worker-max-concurrent-workflow-tasks 500 \
   --worker-max-concurrent-activity-pollers 40 \
@@ -80,9 +81,10 @@ go run ./cmd run-scenario-with-worker \
 go run ./cmd run-scenario-with-worker \
   --scenario standalone_activity \
   --language go \
-  --run-id run-1 \
+  --run-id run-2 \
   --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
   --option payload-size=1024 \
+  --option fail-for-attempts=9 \
   --worker-max-concurrent-activity-pollers 40 \
   --worker-max-concurrent-activities 500 \
   --do-not-register-search-attributes \

From 285d07883784061ac0277ac573bef2e4bb9c6f46 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 11:23:49 -0400
Subject: [PATCH 34/40] Fix SAA Get timeout: pass context with deadline

The Go SDK's PollActivityExecution uses a 10s default gRPC timeout
when the context has no deadline. With 9 activity retries at server-
enforced ~1s backoff, the activity takes >10s total, hitting this
limit. Pass an explicit 60s timeout context to handle.Get().
---
 scenarios/standalone_activity.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scenarios/standalone_activity.go b/scenarios/standalone_activity.go
index f7dc4e05..ae301394 100644
--- a/scenarios/standalone_activity.go
+++ b/scenarios/standalone_activity.go
@@ -36,7 +36,9 @@ func init() {
 					if err != nil {
 						return err
 					}
-					return handle.Get(ctx, nil)
+					getCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
+					defer cancel()
+					return handle.Get(getCtx, nil)
 				},
 			}
 		},
@@ -52,7 +54,8 @@ func activityOptions(r *loadgen.Run, failForAttempts int32) client.StartActivity
 			r.Iteration,
 		),
 		TaskQueue:           r.TaskQueue(),
-		StartToCloseTimeout: 5 * time.Second,
+		StartToCloseTimeout:    5 * time.Second,
+		ScheduleToCloseTimeout: 60 * time.Second,
 		RetryPolicy: &temporal.RetryPolicy{
 			MaximumAttempts:    failForAttempts + 1,
 			InitialInterval:    1 * time.Millisecond,

From 08874d1627b40d9d2557015e7022e75cad0940f4 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 11:25:50 -0400
Subject: [PATCH 35/40] Upgrade go.temporal.io/sdk to v1.41.1

Fixes PollActivityExecution 10s default timeout bug for standalone
activity handle.Get() when context has no deadline.
---
 workers/go/go.mod |  6 +++---
 workers/go/go.sum | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/workers/go/go.mod b/workers/go/go.mod
index 442b263e..44564d33 100644
--- a/workers/go/go.mod
+++ b/workers/go/go.mod
@@ -5,10 +5,10 @@ go 1.25.0
 require github.com/temporalio/omes v1.0.0
 
 require (
-	github.com/nexus-rpc/sdk-go v0.5.1
+	github.com/nexus-rpc/sdk-go v0.6.0
 	github.com/spf13/cobra v1.8.0
-	go.temporal.io/api v1.62.1
-	go.temporal.io/sdk v1.40.0
+	go.temporal.io/api v1.62.2
+	go.temporal.io/sdk v1.41.1
 	go.uber.org/zap v1.27.0
 )
 
diff --git a/workers/go/go.sum b/workers/go/go.sum
index 44071be7..fa76bdd2 100644
--- a/workers/go/go.sum
+++ b/workers/go/go.sum
@@ -55,8 +55,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
-github.com/nexus-rpc/sdk-go v0.5.1 h1:UFYYfoHlQc+Pn9gQpmn9QE7xluewAn2AO1OSkAh7YFU=
-github.com/nexus-rpc/sdk-go v0.5.1/go.mod h1:FHdPfVQwRuJFZFTF0Y2GOAxCrbIBNrcPna9slkGKPYk=
+github.com/nexus-rpc/sdk-go v0.6.0 h1:QRgnP2zTbxEbiyWG/aXH8uSC5LV/Mg1fqb19jb4DBlo=
+github.com/nexus-rpc/sdk-go v0.6.0/go.mod h1:FHdPfVQwRuJFZFTF0Y2GOAxCrbIBNrcPna9slkGKPYk=
 github.com/parquet-go/parquet-go v0.25.1 h1:l7jJwNM0xrk0cnIIptWMtnSnuxRkwq53S+Po3KG8Xgo=
 github.com/parquet-go/parquet-go v0.25.1/go.mod h1:AXBuotO1XiBtcqJb/FKFyjBG4aqa3aQAAWF3ZPzCanY=
 github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
@@ -101,10 +101,10 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
-go.temporal.io/api v1.62.1 h1:7UHMNOIqfYBVTaW0JIh/wDpw2jORkB6zUKsxGtvjSZU=
-go.temporal.io/api v1.62.1/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM=
-go.temporal.io/sdk v1.40.0 h1:n9JN3ezVpWBxLzz5xViCo0sKxp7kVVhr1Su0bcMRNNs=
-go.temporal.io/sdk v1.40.0/go.mod h1:tauxVfN174F0bdEs27+i0h8UPD7xBb6Py2SPHo7f1C0=
+go.temporal.io/api v1.62.2 h1:jFhIzlqNyJsJZTiCRQmTIMv6OTQ5BZ57z8gbgLGMaoo=
+go.temporal.io/api v1.62.2/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM=
+go.temporal.io/sdk v1.41.1 h1:yOpvsHyDD1lNuwlGBv/SUodCPhjv9nDeC9lLHW/fJUA=
+go.temporal.io/sdk v1.41.1/go.mod h1:/InXQT5guZ6AizYzpmzr5avQ/GMgq1ZObcKlKE2AhTc=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=

From 27bd42d607baf08055aaafbefea23bf5e152d809 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 11:28:51 -0400
Subject: [PATCH 36/40] Upgrade root module go.temporal.io/sdk to v1.41.1

The previous commit only upgraded the worker module. The starter
(scenarios/loadgen) uses the root module, which is where handle.Get()
runs and hits the 10s default gRPC timeout bug.
---
 go.mod |  6 +++---
 go.sum | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/go.mod b/go.mod
index 7fb4566d..71d348d6 100644
--- a/go.mod
+++ b/go.mod
@@ -13,8 +13,8 @@ require (
 	github.com/spf13/pflag v1.0.5
 	github.com/stretchr/testify v1.11.1
 	github.com/temporalio/features v0.0.0-20251218214540-e36ee9b474e2
-	go.temporal.io/api v1.62.1
-	go.temporal.io/sdk v1.40.0
+	go.temporal.io/api v1.62.2
+	go.temporal.io/sdk v1.41.1
 	go.uber.org/zap v1.27.0
 	golang.org/x/mod v0.28.0
 	golang.org/x/sync v0.17.0
@@ -46,7 +46,7 @@ require (
 	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
-	github.com/nexus-rpc/sdk-go v0.5.1 // indirect
+	github.com/nexus-rpc/sdk-go v0.6.0 // indirect
 	github.com/pierrec/lz4/v4 v4.1.21 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
diff --git a/go.sum b/go.sum
index 042a9aeb..57f9f013 100644
--- a/go.sum
+++ b/go.sum
@@ -58,8 +58,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
-github.com/nexus-rpc/sdk-go v0.5.1 h1:UFYYfoHlQc+Pn9gQpmn9QE7xluewAn2AO1OSkAh7YFU=
-github.com/nexus-rpc/sdk-go v0.5.1/go.mod h1:FHdPfVQwRuJFZFTF0Y2GOAxCrbIBNrcPna9slkGKPYk=
+github.com/nexus-rpc/sdk-go v0.6.0 h1:QRgnP2zTbxEbiyWG/aXH8uSC5LV/Mg1fqb19jb4DBlo=
+github.com/nexus-rpc/sdk-go v0.6.0/go.mod h1:FHdPfVQwRuJFZFTF0Y2GOAxCrbIBNrcPna9slkGKPYk=
 github.com/parquet-go/parquet-go v0.25.1 h1:l7jJwNM0xrk0cnIIptWMtnSnuxRkwq53S+Po3KG8Xgo=
 github.com/parquet-go/parquet-go v0.25.1/go.mod h1:AXBuotO1XiBtcqJb/FKFyjBG4aqa3aQAAWF3ZPzCanY=
 github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
@@ -104,10 +104,10 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
 github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
-go.temporal.io/api v1.62.1 h1:7UHMNOIqfYBVTaW0JIh/wDpw2jORkB6zUKsxGtvjSZU=
-go.temporal.io/api v1.62.1/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM=
-go.temporal.io/sdk v1.40.0 h1:n9JN3ezVpWBxLzz5xViCo0sKxp7kVVhr1Su0bcMRNNs=
-go.temporal.io/sdk v1.40.0/go.mod h1:tauxVfN174F0bdEs27+i0h8UPD7xBb6Py2SPHo7f1C0=
+go.temporal.io/api v1.62.2 h1:jFhIzlqNyJsJZTiCRQmTIMv6OTQ5BZ57z8gbgLGMaoo=
+go.temporal.io/api v1.62.2/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM=
+go.temporal.io/sdk v1.41.1 h1:yOpvsHyDD1lNuwlGBv/SUodCPhjv9nDeC9lLHW/fJUA=
+go.temporal.io/sdk v1.41.1/go.mod h1:/InXQT5guZ6AizYzpmzr5avQ/GMgq1ZObcKlKE2AhTc=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=

From 5d8ef4d237f4d02e3481e2490b6a8117bca5ece1 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 19:57:10 -0400
Subject: [PATCH 37/40] deploy-omes

---
 deploy-omes/README.md            | 53 +++++++++++++++++++++++
 deploy-omes/config.fish          | 11 +++++
 deploy-omes/delete-executor.fish | 20 +++++++++
 deploy-omes/executor-job.yaml    | 36 ++++++++++++++++
 deploy-omes/fetch-config.fish    | 61 ++++++++++++++++++++++++++
 deploy-omes/patch-worker.fish    | 71 +++++++++++++++++++++++++++++++
 deploy-omes/run-executor.fish    | 73 ++++++++++++++++++++++++++++++++
 7 files changed, 325 insertions(+)
 create mode 100644 deploy-omes/README.md
 create mode 100644 deploy-omes/config.fish
 create mode 100755 deploy-omes/delete-executor.fish
 create mode 100644 deploy-omes/executor-job.yaml
 create mode 100755 deploy-omes/fetch-config.fish
 create mode 100755 deploy-omes/patch-worker.fish
 create mode 100755 deploy-omes/run-executor.fish

diff --git a/deploy-omes/README.md b/deploy-omes/README.md
new file mode 100644
index 00000000..889a76a4
--- /dev/null
+++ b/deploy-omes/README.md
@@ -0,0 +1,53 @@
+# OMES Worker & Executor Scripts
+
+Manage OMES workers and scenario executors on a running test cell.
+
+## Prerequisites
+
+A test cell with OMES enabled must be scaffolded first:
+
+```
+omni scaffold environment create \
+    --cell-id=<cell> \
+    --namespace=<ns> \
+    --yaml=v5-aws-dev \
+    --temporal-version=<server-version> \
+    --agent-version=<server-version> \
+    --web-version=<web-version> \
+    --go-canary-version=<canary-version> \
+    --omes-enabled \
+    --omes-run-id=<run-id> \
+    --omes-image-tag=<image-tag>
+```
+
+This creates the cell, namespace, and initial OMES deployment (running `run-scenario-with-worker`). These scripts then split that into separate worker and executor components.
+
+## Setup
+
+Generate config from a running cluster:
+
+```
+fish fetch-config.fish <cell-id>
+```
+
+Or edit `config.fish` manually.
+
+## Usage
+
+**Patch the deployment to run workers only:**
+
+```
+fish patch-worker.fish [replicas]   # default: 2
+```
+
+**Run a scenario executor (creates a Job):**
+
+```
+fish run-executor.fish [duration]   # default: 600s
+```
+
+**Delete the executor job:**
+
+```
+fish delete-executor.fish
+```
diff --git a/deploy-omes/config.fish b/deploy-omes/config.fish
new file mode 100644
index 00000000..4e6151cc
--- /dev/null
+++ b/deploy-omes/config.fish
@@ -0,0 +1,11 @@
+# Configuration for OMES worker and executor scripts
+# Edit the values below for your environment
+
+set -x cell "s-act-alex-19"
+set -x ns "omes-sch-18"
+set -x runid "sch_load1"
+set -x scenario "scheduler_stress"
+set -x omes_image_tag "b973846-go-1.37.0"
+
+# ECR registry for OMES images
+set -x omes_ecr_registry "450777629615.dkr.ecr.us-west-2.amazonaws.com/omes"
diff --git a/deploy-omes/delete-executor.fish b/deploy-omes/delete-executor.fish
new file mode 100755
index 00000000..dbe15e26
--- /dev/null
+++ b/deploy-omes/delete-executor.fish
@@ -0,0 +1,20 @@
+#!/usr/bin/env fish
+
+# Deletes the OMES executor job
+# Usage: delete-executor.fish
+
+set script_dir (dirname (status --current-filename))
+
+if test -f "$script_dir/config.fish"
+    source "$script_dir/config.fish"
+else
+    echo "Error: config.fish not found in $script_dir"
+    exit 1
+end
+
+if test -z "$cell"
+    echo "Error: \$cell is not set in config.fish"
+    exit 1
+end
+
+omni kubectl --context $cell delete job omes-executor -n omes
diff --git a/deploy-omes/executor-job.yaml b/deploy-omes/executor-job.yaml
new file mode 100644
index 00000000..6315981a
--- /dev/null
+++ b/deploy-omes/executor-job.yaml
@@ -0,0 +1,36 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: omes-executor
+  namespace: omes
+  labels:
+    app.kubernetes.io/name: omes-executor
+    app.kubernetes.io/instance: omes
+spec:
+  backoffLimit: 0
+  ttlSecondsAfterFinished: 3600
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: omes-executor
+        app.kubernetes.io/instance: omes
+    spec:
+      restartPolicy: Never
+      containers:
+      - name: omes
+        image: placeholder
+        imagePullPolicy: IfNotPresent
+        command:
+        - /app/temporal-omes
+        args: []
+        ports:
+        - containerPort: 9090
+          name: metrics
+          protocol: TCP
+        volumeMounts:
+        - mountPath: /certs
+          name: certs
+      volumes:
+      - name: certs
+        secret:
+          secretName: placeholder
diff --git a/deploy-omes/fetch-config.fish b/deploy-omes/fetch-config.fish
new file mode 100755
index 00000000..28a05c89
--- /dev/null
+++ b/deploy-omes/fetch-config.fish
@@ -0,0 +1,61 @@
+#!/usr/bin/env fish
+
+# Fetches OMES config from a running cluster and writes config.fish
+# Usage: fetch-config.fish <cell-id>
+
+if test (count $argv) -lt 1
+    echo "Usage: fetch-config.fish <cell-id>"
+    exit 1
+end
+
+set -l cell $argv[1]
+set script_dir (dirname (status --current-filename))
+
+echo "Fetching deployment from $cell..."
+set -l deploy_yaml (omni kubectl --context $cell get deployment -n omes -l app.kubernetes.io/instance=omes -o yaml 2>&1)
+if test $status -ne 0
+    echo "Error: failed to get deployment"
+    echo $deploy_yaml
+    exit 1
+end
+
+set -l image (echo "$deploy_yaml" | yq '.items[0].spec.template.spec.containers[0].image')
+set -l registry (echo $image | string replace -r ':[^:]+$' '')
+set -l tag (echo $image | string replace -r '^.*:' '')
+set -l args (echo "$deploy_yaml" | yq -o=json '.items[0].spec.template.spec.containers[0].args')
+
+set -l ns (echo $args | yq '.[] | select(test("--namespace="))' | string replace '--namespace=' '' | string replace '.e2e' '')
+set -l scenario (echo $args | yq '.[] | select(test("--scenario="))' | string replace '--scenario=' '')
+set -l runid (echo $args | yq '.[] | select(test("--run-id="))' | string replace '--run-id=cicd-go-' '')
+
+set -l config_path "$script_dir/config.fish"
+
+echo "Writing config to $config_path:"
+echo ""
+echo "  cell:              $cell"
+echo "  ns:                $ns"
+echo "  scenario:          $scenario"
+echo "  runid:             $runid"
+echo "  omes_image_tag:    $tag"
+echo "  omes_ecr_registry: $registry"
+echo ""
+echo "Do you want to proceed? (y/n)"
+read -l confirm
+
+if test "$confirm" = "y" -o "$confirm" = "Y"
+    echo "# Configuration for OMES worker and executor scripts
+# Generated by fetch-config.fish from cell $cell
+
+set -x cell \"$cell\"
+set -x ns \"$ns\"
+set -x runid \"$runid\"
+set -x scenario \"$scenario\"
+set -x omes_image_tag \"$tag\"
+
+# ECR registry for OMES images
+set -x omes_ecr_registry \"$registry\"" > $config_path
+    echo "Config written."
+else
+    echo "Cancelled."
+    exit 0
+end
diff --git a/deploy-omes/patch-worker.fish b/deploy-omes/patch-worker.fish
new file mode 100755
index 00000000..e77a91b3
--- /dev/null
+++ b/deploy-omes/patch-worker.fish
@@ -0,0 +1,71 @@
+#!/usr/bin/env fish
+
+# Patches the existing OMES deployment to run only Go workers (no scenario executor)
+# Usage: patch-worker.fish [replicas]
+# Default replicas: 2
+# Requires: config.fish with variables defined
+
+set script_dir (dirname (status --current-filename))
+
+if test -f "$script_dir/config.fish"
+    source "$script_dir/config.fish"
+else
+    echo "Error: config.fish not found in $script_dir"
+    exit 1
+end
+
+for var in cell ns runid scenario
+    if test -z "$$var"
+        echo "Error: \$$var is not set in config.fish"
+        exit 1
+    end
+end
+
+set -l replicas 2
+if test (count $argv) -gt 0
+    set replicas $argv[1]
+end
+
+set -l deployment_name "omes-$ns-e2e-omes-worker"
+set -l namespace_fqdn "$ns.e2e"
+set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
+
+set -l tmpfile (mktemp /tmp/omes-worker-patch.XXXXXX.json)
+yq -o=json -n "
+  .spec.replicas = $replicas |
+  .spec.template.spec.containers[0].name = \"omes\" |
+  .spec.template.spec.containers[0].command = [\"/app/temporal-omes\"] |
+  .spec.template.spec.containers[0].args = [
+    \"run-worker\",
+    \"--language=go\",
+    \"--dir-name=prepared\",
+    \"--scenario=$scenario\",
+    \"--run-id=cicd-go-$runid\",
+    \"--namespace=$namespace_fqdn\",
+    \"--server-address=$server_address\",
+    \"--disable-tls-host-verification\",
+    \"--tls\",
+    \"--tls-cert-path=/certs/tls.crt\",
+    \"--tls-key-path=/certs/tls.key\"
+  ]" > $tmpfile
+
+echo "Patching deployment '$deployment_name' to run $replicas worker(s):"
+echo ""
+yq -P $tmpfile
+echo ""
+echo "Do you want to proceed? (y/n)"
+read -l confirm
+
+if test "$confirm" = "y" -o "$confirm" = "Y"
+    echo ""
+    echo "Patching deployment..."
+    omni kubectl --context $cell patch deployment $deployment_name \
+        -n omes \
+        --type=strategic \
+        --patch-file=$tmpfile
+    rm -f $tmpfile
+else
+    rm -f $tmpfile
+    echo "Cancelled."
+    exit 0
+end
diff --git a/deploy-omes/run-executor.fish b/deploy-omes/run-executor.fish
new file mode 100755
index 00000000..7064aaa2
--- /dev/null
+++ b/deploy-omes/run-executor.fish
@@ -0,0 +1,73 @@
+#!/usr/bin/env fish
+
+# Runs the OMES scenario executor as a Kubernetes Job.
+# Deletes any previous run, recreates from template, and follows logs.
+# Usage: run-executor.fish [duration]
+# Default duration: 600s
+
+set script_dir (dirname (status --current-filename))
+
+if test -f "$script_dir/config.fish"
+    source "$script_dir/config.fish"
+else
+    echo "Error: config.fish not found in $script_dir"
+    exit 1
+end
+
+for var in cell ns runid scenario omes_image_tag omes_ecr_registry
+    if test -z "$$var"
+        echo "Error: \$$var is not set in config.fish"
+        exit 1
+    end
+end
+
+set -l duration "600s"
+if test (count $argv) -gt 0
+    set duration $argv[1]
+end
+
+set -l job_name "omes-executor"
+set -l deployment_name "omes-$ns-e2e-omes-worker"
+set -l namespace_fqdn "$ns.e2e"
+set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
+set -l image "$omes_ecr_registry:$omes_image_tag"
+set -l secret_name "$deployment_name"
+
+set -l yq_expr ".spec.template.spec.containers[0].image = \"$image\" |
+     .spec.template.spec.containers[0].args = [
+       \"run-scenario\",
+       \"--scenario=$scenario\",
+       \"--run-id=cicd-go-$runid\",
+       \"--namespace=$namespace_fqdn\",
+       \"--server-address=$server_address\",
+       \"--disable-tls-host-verification\",
+       \"--tls\",
+       \"--tls-cert-path=/certs/tls.crt\",
+       \"--tls-key-path=/certs/tls.key\",
+       \"--duration=$duration\",
+       \"--do-not-register-search-attributes\"
+     ] |
+     .spec.template.spec.volumes[0].secret.secretName = \"$secret_name\""
+
+# Write rendered yaml to a temp file to preserve formatting
+set -l tmpfile (mktemp /tmp/omes-executor.XXXXXX.yaml)
+yq eval "$yq_expr" "$script_dir/executor-job.yaml" > $tmpfile
+
+cat $tmpfile
+echo ""
+echo "Run executor? (y/n)"
+read -l confirm
+
+if test "$confirm" = "y" -o "$confirm" = "Y"
+    omni kubectl --context $cell delete job $job_name -n omes --wait=true 2>/dev/null
+    omni kubectl --context $cell apply -f $tmpfile -n omes
+    rm -f $tmpfile
+    echo ""
+    echo "Job started. Useful commands:"
+    echo "  omni kubectl --context $cell logs -f job/$job_name -n omes"
+    echo "  omni kubectl --context $cell get job $job_name -n omes"
+else
+    rm -f $tmpfile
+    echo "Cancelled."
+    exit 0
+end

From 183f104a477c2e8e5318d66b45f413e6b3c5d1f7 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 22:41:39 -0400
Subject: [PATCH 38/40] Modifications for saa cogs experiment

---
 deploy-omes/README.md         |  31 +--
 deploy-omes/SETUP.md          | 469 ++++++++++++++++++++++++++++++++++
 deploy-omes/config.fish       |  18 +-
 deploy-omes/patch-worker.fish |  69 +++--
 deploy-omes/run-executor.fish |  62 +++--
 5 files changed, 585 insertions(+), 64 deletions(-)
 create mode 100644 deploy-omes/SETUP.md

diff --git a/deploy-omes/README.md b/deploy-omes/README.md
index 889a76a4..1b05c2f6 100644
--- a/deploy-omes/README.md
+++ b/deploy-omes/README.md
@@ -4,23 +4,10 @@ Manage OMES workers and scenario executors on a running test cell.
 
 ## Prerequisites
 
-A test cell with OMES enabled must be scaffolded first:
+A test cell with an OMES deployment. Either:
 
-```
-omni scaffold environment create \
-    --cell-id=<cell> \
-    --namespace=<ns> \
-    --yaml=v5-aws-dev \
-    --temporal-version=<server-version> \
-    --agent-version=<server-version> \
-    --web-version=<web-version> \
-    --go-canary-version=<canary-version> \
-    --omes-enabled \
-    --omes-run-id=<run-id> \
-    --omes-image-tag=<image-tag>
-```
-
-This creates the cell, namespace, and initial OMES deployment (running `run-scenario-with-worker`). These scripts then split that into separate worker and executor components.
+1. Scaffold with `--omes-enabled` (creates cell, namespace, deployment, and mTLS certs), or
+2. Use `omni scaffold environment omes setup` to add an OMES deployment to an existing cell
 
 ## Setup
 
@@ -30,7 +17,17 @@ Generate config from a running cluster:
 fish fetch-config.fish <cell-id>
 ```
 
-Or edit `config.fish` manually.
+Or edit `config.fish` manually. Key settings:
+
+- `auth_method`: `"api_key"` or `"mtls"`
+- `api_gateway`: API gateway endpoint (for `api_key` auth)
+
+For API key auth, create the k8s secret:
+
+```
+ct kubectl --context <cell> create secret generic omes-api-key \
+    -n omes --from-literal=api-key=<your-api-key>
+```
 
 ## Usage
 
diff --git a/deploy-omes/SETUP.md b/deploy-omes/SETUP.md
new file mode 100644
index 00000000..9e94423c
--- /dev/null
+++ b/deploy-omes/SETUP.md
@@ -0,0 +1,469 @@
+# Deploying OMES to a Cloud Test Cell (API Key Auth)
+
+Step-by-step record of deploying omes workers to cell `s-saa-cogs`, targeting
+namespace `saa-cogs-4.temporal-dev` with API key authentication. March 2026.
+
+## Background
+
+The cell `s-saa-cogs` was created by the Cloud Capacity team via `omni scaffold
+environment create` (without `--omes-enabled`). The namespace
+`saa-cogs-4.temporal-dev` was created separately with `--auth-method api_key`.
+We needed to deploy omes workers running custom scenarios
+(`workflow_with_single_activity`, `standalone_activity`) from the `saa-cogs`
+branch for SAA COGS measurement experiments.
+
+## Problems encountered and solutions
+
+### 1. Scaffold omes setup failed: missing search attributes
+
+```
+ct scaffold environment omes setup \
+  --cell-id s-saa-cogs \
+  --namespace saa-cogs-4.temporal-dev \
+  --run-id run-1
+```
+
+Failed with: `namespace saa-cogs-4.temporal-dev missing the following search
+attributes CustomKeywordField,CustomStringField,KS_Int,KS_Keyword`.
+
+The search attributes existed on the cell (created via `ct admintools`) but the
+scaffold workflow validates via the **cloud control plane API**, not direct cell
+access.
+
+**Fix**: Create search attributes via the cloud API:
+
+```bash
+ct ocld test cloud-apis namespaces update \
+  -n saa-cogs-4.temporal-dev \
+  --sa "CustomKeywordField=Keyword" \
+  --sa "CustomStringField=Text" \
+  --sa "KS_Int=Int" \
+  --sa "KS_Keyword=Keyword"
+```
+
+### 2. Scaffold deployment failed: JWT authentication error
+
+Scaffold creates a deployment with mTLS cert-based auth (`--tls-cert-path`,
+`--tls-key-path`), but the namespace uses API key auth. Pods crash with:
+
+```
+FATAL  failed to dial: Jwt is missing
+```
+
+**Fix**: Patch the deployment to use API key auth instead of mTLS (see scripts
+below). This requires:
+- A k8s secret containing the API key
+- Deployment args using `--auth-header` and `--disable-tls-host-verification`
+  instead of cert paths
+- The server address changed to the API gateway endpoint
+  (`us-west-2.aws.api.tmprl-test.cloud:7233`) rather than the direct cell
+  address
+
+### 3. Custom image not in ECR
+
+The omes CI pushes images to **Docker Hub** (`temporaliotest/omes`), not ECR.
+Test cells enforce a Kyverno `restrict-image-registries` policy that only allows
+images from approved ECR registries.
+
+**Fix**: Mirror the image from Docker Hub to ECR using `skopeo` (see below).
+
+### 4. Custom scenarios not in stock image
+
+The stock omes image (`go-1.35.0`) doesn't contain the `workflow_with_single_activity`
+or `standalone_activity` scenarios added on the `saa-cogs` branch.
+
+**Fix**: Trigger the GitHub Actions CI on the branch to build a new image, then
+mirror it to ECR (see below).
+
+## Procedure
+
+### Step 1: Create the omes deployment via scaffold
+
+This creates the k8s deployment, namespace on the omes k8s namespace, and TLS
+secrets (even though we won't use mTLS).
+
+```bash
+ct scaffold environment omes setup \
+  --cell-id s-saa-cogs \
+  --namespace saa-cogs-4.temporal-dev \
+  --run-id run-1
+```
+
+Track the workflow at:
+```
+https://cloud.temporal.io/namespaces/scaffold.infra/workflows
+```
+
+### Step 2: Create the API key secret
+
+```bash
+ct kubectl --context s-saa-cogs create secret generic omes-api-key \
+  -n omes --from-literal=api-key='<your-temporal-api-key>'
+```
+
+### Step 3: Build and push a custom omes image
+
+Trigger the GitHub Actions workflow on the branch:
+
+```bash
+gh workflow run all-docker-images.yml \
+  --ref saa-cogs \
+  -f go-version=v1.41.1 \
+  -f do-push=true
+```
+
+This pushes to Docker Hub as `temporaliotest/omes:<omes-commit-sha>-go-1.41.1`.
+Check the tag:
+
+```bash
+curl -s "https://hub.docker.com/v2/repositories/temporaliotest/omes/tags/?page_size=5&ordering=-last_updated" \
+  | python3 -c "import json,sys; d=json.load(sys.stdin); [print(r['name']) for r in d.get('results',[])]"
+```
+
+### Step 4: Mirror the image from Docker Hub to ECR
+
+```bash
+ecr=450777629615.dkr.ecr.us-west-2.amazonaws.com
+tag=27bd42d-go-1.41.1  # your tag from step 3
+
+# Authenticate to ECR (requires infra-01 access)
+ct access --account infra-01 -d 8h
+
+aws ecr get-login-password --region us-west-2 --profile infra-01/AWSAdministratorAccess \
+  | skopeo login $ecr --username AWS --password-stdin
+
+# Copy from Docker Hub to ECR
+skopeo copy --all --insecure-policy \
+  docker://docker.io/temporaliotest/omes:$tag \
+  docker://$ecr/omes:$tag
+```
+
+Note: `docker login` credentials are not used by skopeo; you must use
+`skopeo login` separately.
+
+### Step 5: Configure and patch the deployment
+
+Edit `deploy-omes/config.fish`:
+
+```fish
+# Configuration for OMES worker and executor scripts
+# Edit the values below for your environment
+
+set -x cell "s-saa-cogs"
+set -x ns "saa-cogs-4"
+set -x runid "run-cell-1"
+set -x scenario "workflow_with_single_activity"
+set -x omes_image_tag "27bd42d-go-1.41.1"
+
+# ECR registry for OMES images (mirrored from Docker Hub via skopeo)
+set -x omes_ecr_registry "450777629615.dkr.ecr.us-west-2.amazonaws.com/omes"
+
+# Auth: "mtls" (uses /certs volume) or "api_key" (uses k8s secret)
+set -x auth_method "api_key"
+
+# API gateway endpoint (used with api_key auth)
+set -x api_gateway "us-west-2.aws.api.tmprl-test.cloud:7233"
+```
+
+Then patch the deployment:
+
+```bash
+cd deploy-omes
+fish patch-worker.fish
+```
+
+### Step 6: Verify
+
+```bash
+ct kubectl --context s-saa-cogs get pods -n omes
+ct kubectl --context s-saa-cogs logs -n omes -l app.kubernetes.io/instance=omes --tail=5
+```
+
+Expected output: pods Running, logs showing `Started Worker` on the correct
+task queue and namespace.
+
+## Script reference
+
+### patch-worker.fish
+
+Patches the omes deployment to run Go workers. Supports both API key and mTLS
+auth. Sets the container image, command args, and (for API key) injects the
+`TEMPORAL_API_KEY` env var from the `omes-api-key` k8s secret. Kubernetes
+expands `$(TEMPORAL_API_KEY)` in container args at pod creation time.
+
+```fish
+#!/usr/bin/env fish
+
+# Patches the existing OMES deployment to run only Go workers (no scenario executor)
+# Usage: patch-worker.fish [replicas]
+# Default replicas: 2
+# Requires: config.fish with variables defined
+
+set script_dir (dirname (status --current-filename))
+
+if test -f "$script_dir/config.fish"
+    source "$script_dir/config.fish"
+else
+    echo "Error: config.fish not found in $script_dir"
+    exit 1
+end
+
+for var in cell ns runid scenario auth_method omes_image_tag omes_ecr_registry
+    if test -z "$$var"
+        echo "Error: \$$var is not set in config.fish"
+        exit 1
+    end
+end
+
+set -l replicas 2
+if test (count $argv) -gt 0
+    set replicas $argv[1]
+end
+
+set -l deployment_name "omes-$ns-temporal-dev-omes-worker"
+set -l namespace_fqdn "$ns.temporal-dev"
+set -l image "$omes_ecr_registry:$omes_image_tag"
+
+set -l tmpfile (mktemp /tmp/omes-worker-patch.XXXXXX.json)
+
+if test "$auth_method" = "api_key"
+    set -l server_address "$api_gateway"
+    yq -o=json -n "
+      .spec.replicas = $replicas |
+      .spec.template.spec.containers[0].name = \"omes\" |
+      .spec.template.spec.containers[0].image = \"$image\" |
+      .spec.template.spec.containers[0].command = [\"/app/temporal-omes\"] |
+      .spec.template.spec.containers[0].args = [
+        \"run-worker\",
+        \"--language=go\",
+        \"--dir-name=prepared\",
+        \"--scenario=$scenario\",
+        \"--run-id=cicd-go-$runid\",
+        \"--namespace=$namespace_fqdn\",
+        \"--server-address=$server_address\",
+        \"--tls\",
+        \"--disable-tls-host-verification\",
+        \"--auth-header=Bearer \$(TEMPORAL_API_KEY)\"
+      ] |
+      .spec.template.spec.containers[0].env = [
+        {\"name\": \"TEMPORAL_API_KEY\", \"valueFrom\": {\"secretKeyRef\": {\"name\": \"omes-api-key\", \"key\": \"api-key\"}}}
+      ]" > $tmpfile
+else
+    set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
+    yq -o=json -n "
+      .spec.replicas = $replicas |
+      .spec.template.spec.containers[0].name = \"omes\" |
+      .spec.template.spec.containers[0].image = \"$image\" |
+      .spec.template.spec.containers[0].command = [\"/app/temporal-omes\"] |
+      .spec.template.spec.containers[0].args = [
+        \"run-worker\",
+        \"--language=go\",
+        \"--dir-name=prepared\",
+        \"--scenario=$scenario\",
+        \"--run-id=cicd-go-$runid\",
+        \"--namespace=$namespace_fqdn\",
+        \"--server-address=$server_address\",
+        \"--disable-tls-host-verification\",
+        \"--tls\",
+        \"--tls-cert-path=/certs/tls.crt\",
+        \"--tls-key-path=/certs/tls.key\"
+      ]" > $tmpfile
+end
+
+echo "Patching deployment '$deployment_name' to run $replicas worker(s):"
+echo ""
+yq -P $tmpfile
+echo ""
+echo "Do you want to proceed? (y/n)"
+read -l confirm
+
+if test "$confirm" = "y" -o "$confirm" = "Y"
+    echo ""
+    echo "Patching deployment..."
+    omni kubectl --context $cell patch deployment $deployment_name \
+        -n omes \
+        --type=strategic \
+        --patch-file=$tmpfile
+    rm -f $tmpfile
+else
+    rm -f $tmpfile
+    echo "Cancelled."
+    exit 0
+end
+```
+
+### run-executor.fish
+
+Creates a k8s Job that runs the scenario executor (starts
+workflows/activities). The workers (deployed via `patch-worker.fish`) handle the
+actual execution.
+
+```fish
+#!/usr/bin/env fish
+
+# Runs the OMES scenario executor as a Kubernetes Job.
+# Deletes any previous run, recreates from template, and follows logs.
+# Usage: run-executor.fish [duration]
+# Default duration: 600s
+
+set script_dir (dirname (status --current-filename))
+
+if test -f "$script_dir/config.fish"
+    source "$script_dir/config.fish"
+else
+    echo "Error: config.fish not found in $script_dir"
+    exit 1
+end
+
+for var in cell ns runid scenario omes_image_tag omes_ecr_registry auth_method
+    if test -z "$$var"
+        echo "Error: \$$var is not set in config.fish"
+        exit 1
+    end
+end
+
+set -l duration "600s"
+if test (count $argv) -gt 0
+    set duration $argv[1]
+end
+
+set -l job_name "omes-executor"
+set -l namespace_fqdn "$ns.temporal-dev"
+set -l image "$omes_ecr_registry:$omes_image_tag"
+
+if test "$auth_method" = "api_key"
+    set -l server_address "$api_gateway"
+    set -l yq_expr ".spec.template.spec.containers[0].image = \"$image\" |
+         .spec.template.spec.containers[0].args = [
+           \"run-scenario\",
+           \"--scenario=$scenario\",
+           \"--run-id=cicd-go-$runid\",
+           \"--namespace=$namespace_fqdn\",
+           \"--server-address=$server_address\",
+           \"--tls\",
+           \"--disable-tls-host-verification\",
+           \"--auth-header=Bearer \$(TEMPORAL_API_KEY)\",
+           \"--duration=$duration\",
+           \"--do-not-register-search-attributes\"
+         ] |
+         .spec.template.spec.containers[0].env = [
+           {\"name\": \"TEMPORAL_API_KEY\", \"valueFrom\": {\"secretKeyRef\": {\"name\": \"omes-api-key\", \"key\": \"api-key\"}}}
+         ] |
+         del(.spec.template.spec.volumes) |
+         del(.spec.template.spec.containers[0].volumeMounts)"
+else
+    set -l deployment_name "omes-$ns-temporal-dev-omes-worker"
+    set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
+    set -l secret_name "$deployment_name"
+    set -l yq_expr ".spec.template.spec.containers[0].image = \"$image\" |
+         .spec.template.spec.containers[0].args = [
+           \"run-scenario\",
+           \"--scenario=$scenario\",
+           \"--run-id=cicd-go-$runid\",
+           \"--namespace=$namespace_fqdn\",
+           \"--server-address=$server_address\",
+           \"--disable-tls-host-verification\",
+           \"--tls\",
+           \"--tls-cert-path=/certs/tls.crt\",
+           \"--tls-key-path=/certs/tls.key\",
+           \"--duration=$duration\",
+           \"--do-not-register-search-attributes\"
+         ] |
+         .spec.template.spec.volumes[0].secret.secretName = \"$secret_name\""
+end
+
+# Write rendered yaml to a temp file to preserve formatting
+set -l tmpfile (mktemp /tmp/omes-executor.XXXXXX.yaml)
+yq eval "$yq_expr" "$script_dir/executor-job.yaml" > $tmpfile
+
+cat $tmpfile
+echo ""
+echo "Run executor? (y/n)"
+read -l confirm
+
+if test "$confirm" = "y" -o "$confirm" = "Y"
+    omni kubectl --context $cell delete job $job_name -n omes --wait=true 2>/dev/null
+    omni kubectl --context $cell apply -f $tmpfile -n omes
+    rm -f $tmpfile
+    echo ""
+    echo "Job started. Useful commands:"
+    echo "  omni kubectl --context $cell logs -f job/$job_name -n omes"
+    echo "  omni kubectl --context $cell get job $job_name -n omes"
+else
+    rm -f $tmpfile
+    echo "Cancelled."
+    exit 0
+end
+```
+
+### executor-job.yaml
+
+Template for the executor k8s Job. Fields are overwritten by `run-executor.fish`.
+
+```yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: omes-executor
+  namespace: omes
+  labels:
+    app.kubernetes.io/name: omes-executor
+    app.kubernetes.io/instance: omes
+spec:
+  backoffLimit: 0
+  ttlSecondsAfterFinished: 3600
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: omes-executor
+        app.kubernetes.io/instance: omes
+    spec:
+      restartPolicy: Never
+      containers:
+      - name: omes
+        image: placeholder
+        imagePullPolicy: IfNotPresent
+        command:
+        - /app/temporal-omes
+        args: []
+        ports:
+        - containerPort: 9090
+          name: metrics
+          protocol: TCP
+        volumeMounts:
+        - mountPath: /certs
+          name: certs
+      volumes:
+      - name: certs
+        secret:
+          secretName: placeholder
+```
+
+## Key details
+
+- **Image registry**: Test cells enforce a Kyverno `restrict-image-registries`
+  policy. Only `450777629615.dkr.ecr.us-west-2.amazonaws.com` and
+  `612212029444.dkr.ecr.us-west-2.amazonaws.com` are allowed. Docker Hub images
+  must be mirrored to ECR via `skopeo`.
+
+- **ECR access**: Requires `ct access --account infra-01` then
+  `skopeo login` (not `docker login`) with the ECR credentials.
+
+- **API key in k8s**: Stored as a k8s secret `omes-api-key` in the `omes`
+  namespace. Injected into pods via `secretKeyRef`. Kubernetes expands
+  `$(TEMPORAL_API_KEY)` in container args.
+
+- **Server address**: API key auth uses the API gateway
+  (`us-west-2.aws.api.tmprl-test.cloud:7233`), not the direct cell address
+  (`<ns>.tmprl-test.cloud:7233`) which is used with mTLS.
+
+- **Deployment naming**: Scaffold creates the deployment as
+  `omes-<ns>-temporal-dev-omes-worker` (e.g.
+  `omes-saa-cogs-4-temporal-dev-omes-worker`).
+
+- **Task queue**: The worker polls `omes-cicd-go-<runid>` (prefixed by
+  `omes` hardcoded, then `cicd-go-` from the `--run-id` flag).
+
+- **Scaffold workflow tracking**: Scaffold workflows run in the `scaffold.infra`
+  namespace, visible at
+  `https://cloud.temporal.io/namespaces/scaffold.infra/workflows`.
diff --git a/deploy-omes/config.fish b/deploy-omes/config.fish
index 4e6151cc..a4126d6f 100644
--- a/deploy-omes/config.fish
+++ b/deploy-omes/config.fish
@@ -1,11 +1,17 @@
 # Configuration for OMES worker and executor scripts
 # Edit the values below for your environment
 
-set -x cell "s-act-alex-19"
-set -x ns "omes-sch-18"
-set -x runid "sch_load1"
-set -x scenario "scheduler_stress"
-set -x omes_image_tag "b973846-go-1.37.0"
+set -x cell "s-saa-cogs"
+set -x ns "saa-cogs-4"
+set -x runid "run-cell-1"
+set -x scenario "workflow_with_single_activity"
+set -x omes_image_tag "27bd42d-go-1.41.1"
 
-# ECR registry for OMES images
+# ECR registry for OMES images (mirrored from Docker Hub via skopeo)
 set -x omes_ecr_registry "450777629615.dkr.ecr.us-west-2.amazonaws.com/omes"
+
+# Auth: "mtls" (uses /certs volume) or "api_key" (uses k8s secret)
+set -x auth_method "api_key"
+
+# API gateway endpoint (used with api_key auth)
+set -x api_gateway "us-west-2.aws.api.tmprl-test.cloud:7233"
diff --git a/deploy-omes/patch-worker.fish b/deploy-omes/patch-worker.fish
index e77a91b3..da588bdf 100755
--- a/deploy-omes/patch-worker.fish
+++ b/deploy-omes/patch-worker.fish
@@ -14,7 +14,7 @@ else
     exit 1
 end
 
-for var in cell ns runid scenario
+for var in cell ns runid scenario auth_method omes_image_tag omes_ecr_registry
     if test -z "$$var"
         echo "Error: \$$var is not set in config.fish"
         exit 1
@@ -26,28 +26,55 @@ if test (count $argv) -gt 0
     set replicas $argv[1]
 end
 
-set -l deployment_name "omes-$ns-e2e-omes-worker"
-set -l namespace_fqdn "$ns.e2e"
-set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
+set -l deployment_name "omes-$ns-temporal-dev-omes-worker"
+set -l namespace_fqdn "$ns.temporal-dev"
+set -l image "$omes_ecr_registry:$omes_image_tag"
 
 set -l tmpfile (mktemp /tmp/omes-worker-patch.XXXXXX.json)
-yq -o=json -n "
-  .spec.replicas = $replicas |
-  .spec.template.spec.containers[0].name = \"omes\" |
-  .spec.template.spec.containers[0].command = [\"/app/temporal-omes\"] |
-  .spec.template.spec.containers[0].args = [
-    \"run-worker\",
-    \"--language=go\",
-    \"--dir-name=prepared\",
-    \"--scenario=$scenario\",
-    \"--run-id=cicd-go-$runid\",
-    \"--namespace=$namespace_fqdn\",
-    \"--server-address=$server_address\",
-    \"--disable-tls-host-verification\",
-    \"--tls\",
-    \"--tls-cert-path=/certs/tls.crt\",
-    \"--tls-key-path=/certs/tls.key\"
-  ]" > $tmpfile
+
+if test "$auth_method" = "api_key"
+    set -l server_address "$api_gateway"
+    yq -o=json -n "
+      .spec.replicas = $replicas |
+      .spec.template.spec.containers[0].name = \"omes\" |
+      .spec.template.spec.containers[0].image = \"$image\" |
+      .spec.template.spec.containers[0].command = [\"/app/temporal-omes\"] |
+      .spec.template.spec.containers[0].args = [
+        \"run-worker\",
+        \"--language=go\",
+        \"--dir-name=prepared\",
+        \"--scenario=$scenario\",
+        \"--run-id=cicd-go-$runid\",
+        \"--namespace=$namespace_fqdn\",
+        \"--server-address=$server_address\",
+        \"--tls\",
+        \"--disable-tls-host-verification\",
+        \"--auth-header=Bearer \$(TEMPORAL_API_KEY)\"
+      ] |
+      .spec.template.spec.containers[0].env = [
+        {\"name\": \"TEMPORAL_API_KEY\", \"valueFrom\": {\"secretKeyRef\": {\"name\": \"omes-api-key\", \"key\": \"api-key\"}}}
+      ]" > $tmpfile
+else
+    set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
+    yq -o=json -n "
+      .spec.replicas = $replicas |
+      .spec.template.spec.containers[0].name = \"omes\" |
+      .spec.template.spec.containers[0].image = \"$image\" |
+      .spec.template.spec.containers[0].command = [\"/app/temporal-omes\"] |
+      .spec.template.spec.containers[0].args = [
+        \"run-worker\",
+        \"--language=go\",
+        \"--dir-name=prepared\",
+        \"--scenario=$scenario\",
+        \"--run-id=cicd-go-$runid\",
+        \"--namespace=$namespace_fqdn\",
+        \"--server-address=$server_address\",
+        \"--disable-tls-host-verification\",
+        \"--tls\",
+        \"--tls-cert-path=/certs/tls.crt\",
+        \"--tls-key-path=/certs/tls.key\"
+      ]" > $tmpfile
+end
 
 echo "Patching deployment '$deployment_name' to run $replicas worker(s):"
 echo ""
diff --git a/deploy-omes/run-executor.fish b/deploy-omes/run-executor.fish
index 7064aaa2..54c79963 100755
--- a/deploy-omes/run-executor.fish
+++ b/deploy-omes/run-executor.fish
@@ -14,7 +14,7 @@ else
     exit 1
 end
 
-for var in cell ns runid scenario omes_image_tag omes_ecr_registry
+for var in cell ns runid scenario omes_image_tag omes_ecr_registry auth_method
     if test -z "$$var"
         echo "Error: \$$var is not set in config.fish"
         exit 1
@@ -27,27 +27,49 @@ if test (count $argv) -gt 0
 end
 
 set -l job_name "omes-executor"
-set -l deployment_name "omes-$ns-e2e-omes-worker"
-set -l namespace_fqdn "$ns.e2e"
-set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
+set -l namespace_fqdn "$ns.temporal-dev"
 set -l image "$omes_ecr_registry:$omes_image_tag"
-set -l secret_name "$deployment_name"
 
-set -l yq_expr ".spec.template.spec.containers[0].image = \"$image\" |
-     .spec.template.spec.containers[0].args = [
-       \"run-scenario\",
-       \"--scenario=$scenario\",
-       \"--run-id=cicd-go-$runid\",
-       \"--namespace=$namespace_fqdn\",
-       \"--server-address=$server_address\",
-       \"--disable-tls-host-verification\",
-       \"--tls\",
-       \"--tls-cert-path=/certs/tls.crt\",
-       \"--tls-key-path=/certs/tls.key\",
-       \"--duration=$duration\",
-       \"--do-not-register-search-attributes\"
-     ] |
-     .spec.template.spec.volumes[0].secret.secretName = \"$secret_name\""
+if test "$auth_method" = "api_key"
+    set -l server_address "$api_gateway"
+    set -l yq_expr ".spec.template.spec.containers[0].image = \"$image\" |
+         .spec.template.spec.containers[0].args = [
+           \"run-scenario\",
+           \"--scenario=$scenario\",
+           \"--run-id=cicd-go-$runid\",
+           \"--namespace=$namespace_fqdn\",
+           \"--server-address=$server_address\",
+           \"--tls\",
+           \"--disable-tls-host-verification\",
+           \"--auth-header=Bearer \$(TEMPORAL_API_KEY)\",
+           \"--duration=$duration\",
+           \"--do-not-register-search-attributes\"
+         ] |
+         .spec.template.spec.containers[0].env = [
+           {\"name\": \"TEMPORAL_API_KEY\", \"valueFrom\": {\"secretKeyRef\": {\"name\": \"omes-api-key\", \"key\": \"api-key\"}}}
+         ] |
+         del(.spec.template.spec.volumes) |
+         del(.spec.template.spec.containers[0].volumeMounts)"
+else
+    set -l deployment_name "omes-$ns-temporal-dev-omes-worker"
+    set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
+    set -l secret_name "$deployment_name"
+    set -l yq_expr ".spec.template.spec.containers[0].image = \"$image\" |
+         .spec.template.spec.containers[0].args = [
+           \"run-scenario\",
+           \"--scenario=$scenario\",
+           \"--run-id=cicd-go-$runid\",
+           \"--namespace=$namespace_fqdn\",
+           \"--server-address=$server_address\",
+           \"--disable-tls-host-verification\",
+           \"--tls\",
+           \"--tls-cert-path=/certs/tls.crt\",
+           \"--tls-key-path=/certs/tls.key\",
+           \"--duration=$duration\",
+           \"--do-not-register-search-attributes\"
+         ] |
+         .spec.template.spec.volumes[0].secret.secretName = \"$secret_name\""
+end
 
 # Write rendered yaml to a temp file to preserve formatting
 set -l tmpfile (mktemp /tmp/omes-executor.XXXXXX.yaml)

From a1091391668674497676049bade85c1b20c9eddf Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 23:11:46 -0400
Subject: [PATCH 39/40] Increase StartToCloseTimeout to 30s, lower rate to
 500/s

- Standalone activity StartToCloseTimeout 5s -> 30s (tight timeout
  caused failures at high throughput; irrelevant for COGS experiment)
- Default executor rate 1000/s -> 500/s to start conservatively
- Fix fish variable scoping bug in run-executor.fish (yq_expr was
  local to if block, causing placeholder image)
- Add worker tuning flags and image to patch-worker.fish
- Support API key auth in deploy-omes scripts
---
 commands.sh                      |  5 +-
 deploy-omes/config.fish          | 12 ++++-
 deploy-omes/patch-worker.fish    |  4 +-
 deploy-omes/run-executor.fish    | 92 ++++++++++++++++----------------
 scenarios/standalone_activity.go |  2 +-
 5 files changed, 63 insertions(+), 52 deletions(-)

diff --git a/commands.sh b/commands.sh
index 9db37f19..b99fd96f 100644
--- a/commands.sh
+++ b/commands.sh
@@ -82,9 +82,8 @@ go run ./cmd run-scenario-with-worker \
   --scenario standalone_activity \
   --language go \
   --run-id run-2 \
-  --duration 1h --max-concurrent 500 --max-iterations-per-second 100 \
-  --option payload-size=1024 \
-  --option fail-for-attempts=9 \
+  --duration 1h --max-concurrent 500 --max-iterations-per-second 1000 \
+  --option payload-size=102400 \
   --worker-max-concurrent-activity-pollers 40 \
   --worker-max-concurrent-activities 500 \
   --do-not-register-search-attributes \
diff --git a/deploy-omes/config.fish b/deploy-omes/config.fish
index a4126d6f..3a634ad2 100644
--- a/deploy-omes/config.fish
+++ b/deploy-omes/config.fish
@@ -4,7 +4,7 @@
 set -x cell "s-saa-cogs"
 set -x ns "saa-cogs-4"
 set -x runid "run-cell-1"
-set -x scenario "workflow_with_single_activity"
+set -x scenario "standalone_activity"
 set -x omes_image_tag "27bd42d-go-1.41.1"
 
 # ECR registry for OMES images (mirrored from Docker Hub via skopeo)
@@ -15,3 +15,13 @@ set -x auth_method "api_key"
 
 # API gateway endpoint (used with api_key auth)
 set -x api_gateway "us-west-2.aws.api.tmprl-test.cloud:7233"
+
+# Worker tuning (passed to run-worker with worker- prefix)
+set -x max_concurrent_activity_pollers 40
+set -x max_concurrent_activities 500
+
+# Executor load parameters
+set -x duration "1h"
+set -x max_concurrent 500
+set -x max_iterations_per_second 500
+set -x scenario_options "--option payload-size=102400"
diff --git a/deploy-omes/patch-worker.fish b/deploy-omes/patch-worker.fish
index da588bdf..3acee664 100755
--- a/deploy-omes/patch-worker.fish
+++ b/deploy-omes/patch-worker.fish
@@ -49,7 +49,9 @@ if test "$auth_method" = "api_key"
         \"--server-address=$server_address\",
         \"--tls\",
         \"--disable-tls-host-verification\",
-        \"--auth-header=Bearer \$(TEMPORAL_API_KEY)\"
+        \"--auth-header=Bearer \$(TEMPORAL_API_KEY)\",
+        \"--worker-max-concurrent-activity-pollers=$max_concurrent_activity_pollers\",
+        \"--worker-max-concurrent-activities=$max_concurrent_activities\"
       ] |
       .spec.template.spec.containers[0].env = [
         {\"name\": \"TEMPORAL_API_KEY\", \"valueFrom\": {\"secretKeyRef\": {\"name\": \"omes-api-key\", \"key\": \"api-key\"}}}
diff --git a/deploy-omes/run-executor.fish b/deploy-omes/run-executor.fish
index 54c79963..2869fecb 100755
--- a/deploy-omes/run-executor.fish
+++ b/deploy-omes/run-executor.fish
@@ -2,8 +2,6 @@
 
 # Runs the OMES scenario executor as a Kubernetes Job.
 # Deletes any previous run, recreates from template, and follows logs.
-# Usage: run-executor.fish [duration]
-# Default duration: 600s
 
 set script_dir (dirname (status --current-filename))
 
@@ -21,60 +19,62 @@ for var in cell ns runid scenario omes_image_tag omes_ecr_registry auth_method
     end
 end
 
-set -l duration "600s"
-if test (count $argv) -gt 0
-    set duration $argv[1]
-end
-
 set -l job_name "omes-executor"
 set -l namespace_fqdn "$ns.temporal-dev"
 set -l image "$omes_ecr_registry:$omes_image_tag"
+set -l tmpfile (mktemp /tmp/omes-executor.XXXXXX.yaml)
 
 if test "$auth_method" = "api_key"
     set -l server_address "$api_gateway"
-    set -l yq_expr ".spec.template.spec.containers[0].image = \"$image\" |
-         .spec.template.spec.containers[0].args = [
-           \"run-scenario\",
-           \"--scenario=$scenario\",
-           \"--run-id=cicd-go-$runid\",
-           \"--namespace=$namespace_fqdn\",
-           \"--server-address=$server_address\",
-           \"--tls\",
-           \"--disable-tls-host-verification\",
-           \"--auth-header=Bearer \$(TEMPORAL_API_KEY)\",
-           \"--duration=$duration\",
-           \"--do-not-register-search-attributes\"
-         ] |
-         .spec.template.spec.containers[0].env = [
-           {\"name\": \"TEMPORAL_API_KEY\", \"valueFrom\": {\"secretKeyRef\": {\"name\": \"omes-api-key\", \"key\": \"api-key\"}}}
-         ] |
-         del(.spec.template.spec.volumes) |
-         del(.spec.template.spec.containers[0].volumeMounts)"
+    yq eval "
+      .spec.template.spec.containers[0].image = \"$image\" |
+      .spec.template.spec.containers[0].args = [
+        \"run-scenario\",
+        \"--scenario=$scenario\",
+        \"--run-id=cicd-go-$runid\",
+        \"--namespace=$namespace_fqdn\",
+        \"--server-address=$server_address\",
+        \"--tls\",
+        \"--disable-tls-host-verification\",
+        \"--auth-header=Bearer \$(TEMPORAL_API_KEY)\",
+        \"--duration=$duration\",
+        \"--max-concurrent=$max_concurrent\",
+        \"--max-iterations-per-second=$max_iterations_per_second\",
+        \"--option\", \"payload-size=102400\",
+        \"--do-not-register-search-attributes\"
+      ] |
+      .spec.template.spec.containers[0].env = [
+        {\"name\": \"TEMPORAL_API_KEY\", \"valueFrom\": {\"secretKeyRef\": {\"name\": \"omes-api-key\", \"key\": \"api-key\"}}}
+      ] |
+      del(.spec.template.spec.volumes) |
+      del(.spec.template.spec.containers[0].volumeMounts)
+    " "$script_dir/executor-job.yaml" > $tmpfile
 else
     set -l deployment_name "omes-$ns-temporal-dev-omes-worker"
     set -l server_address "$namespace_fqdn.tmprl-test.cloud:7233"
     set -l secret_name "$deployment_name"
-    set -l yq_expr ".spec.template.spec.containers[0].image = \"$image\" |
-         .spec.template.spec.containers[0].args = [
-           \"run-scenario\",
-           \"--scenario=$scenario\",
-           \"--run-id=cicd-go-$runid\",
-           \"--namespace=$namespace_fqdn\",
-           \"--server-address=$server_address\",
-           \"--disable-tls-host-verification\",
-           \"--tls\",
-           \"--tls-cert-path=/certs/tls.crt\",
-           \"--tls-key-path=/certs/tls.key\",
-           \"--duration=$duration\",
-           \"--do-not-register-search-attributes\"
-         ] |
-         .spec.template.spec.volumes[0].secret.secretName = \"$secret_name\""
+    yq eval "
+      .spec.template.spec.containers[0].image = \"$image\" |
+      .spec.template.spec.containers[0].args = [
+        \"run-scenario\",
+        \"--scenario=$scenario\",
+        \"--run-id=cicd-go-$runid\",
+        \"--namespace=$namespace_fqdn\",
+        \"--server-address=$server_address\",
+        \"--disable-tls-host-verification\",
+        \"--tls\",
+        \"--tls-cert-path=/certs/tls.crt\",
+        \"--tls-key-path=/certs/tls.key\",
+        \"--duration=$duration\",
+        \"--max-concurrent=$max_concurrent\",
+        \"--max-iterations-per-second=$max_iterations_per_second\",
+        \"--option\", \"payload-size=102400\",
+        \"--do-not-register-search-attributes\"
+      ] |
+      .spec.template.spec.volumes[0].secret.secretName = \"$secret_name\"
+    " "$script_dir/executor-job.yaml" > $tmpfile
 end
 
-# Write rendered yaml to a temp file to preserve formatting
-set -l tmpfile (mktemp /tmp/omes-executor.XXXXXX.yaml)
-yq eval "$yq_expr" "$script_dir/executor-job.yaml" > $tmpfile
-
 cat $tmpfile
 echo ""
 echo "Run executor? (y/n)"
@@ -86,8 +86,8 @@ if test "$confirm" = "y" -o "$confirm" = "Y"
     rm -f $tmpfile
     echo ""
     echo "Job started. Useful commands:"
-    echo "  omni kubectl --context $cell logs -f job/$job_name -n omes"
-    echo "  omni kubectl --context $cell get job $job_name -n omes"
+    echo "  ct kubectl --context $cell logs -f job/$job_name -n omes"
+    echo "  ct kubectl --context $cell get job $job_name -n omes"
 else
     rm -f $tmpfile
     echo "Cancelled."
diff --git a/scenarios/standalone_activity.go b/scenarios/standalone_activity.go
index ae301394..f052b9ce 100644
--- a/scenarios/standalone_activity.go
+++ b/scenarios/standalone_activity.go
@@ -54,7 +54,7 @@ func activityOptions(r *loadgen.Run, failForAttempts int32) client.StartActivity
 			r.Iteration,
 		),
 		TaskQueue:           r.TaskQueue(),
-		StartToCloseTimeout:    5 * time.Second,
+		StartToCloseTimeout:    30 * time.Second,
 		ScheduleToCloseTimeout: 60 * time.Second,
 		RetryPolicy: &temporal.RetryPolicy{
 			MaximumAttempts:    failForAttempts + 1,

From 838ef3b8823cc9ea62e3a6a394504d7c79b0ed67 Mon Sep 17 00:00:00 2001
From: Dan Davison <dan.davison@temporal.io>
Date: Wed, 25 Mar 2026 23:21:19 -0400
Subject: [PATCH 40/40] Make activity timeouts configurable via scenario
 options

SAA scenario options (with defaults):
  - start-to-close-timeout-seconds (30)
  - schedule-to-close-timeout-seconds (120)
  - get-timeout-seconds (120)

Also bump SAW workflow's activity StartToCloseTimeout from 5s to 30s.
---
 scenarios/standalone_activity.go              | 10 ++++++----
 workers/go/singleactivityworkflow/workflow.go |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/scenarios/standalone_activity.go b/scenarios/standalone_activity.go
index f052b9ce..145ddd42 100644
--- a/scenarios/standalone_activity.go
+++ b/scenarios/standalone_activity.go
@@ -36,7 +36,7 @@ func init() {
 					if err != nil {
 						return err
 					}
-					getCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
+					getCtx, cancel := context.WithTimeout(ctx, time.Duration(r.ScenarioOptionInt("get-timeout-seconds", 120))*time.Second)
 					defer cancel()
 					return handle.Get(getCtx, nil)
 				},
@@ -46,6 +46,8 @@ func init() {
 }
 
 func activityOptions(r *loadgen.Run, failForAttempts int32) client.StartActivityOptions {
+	startToClose := time.Duration(r.ScenarioOptionInt("start-to-close-timeout-seconds", 30)) * time.Second
+	scheduleToClose := time.Duration(r.ScenarioOptionInt("schedule-to-close-timeout-seconds", 120)) * time.Second
 	return client.StartActivityOptions{
 		ID: fmt.Sprintf(
 			"a-%s-%s-%d",
@@ -53,9 +55,9 @@ func activityOptions(r *loadgen.Run, failForAttempts int32) client.StartActivity
 			r.ExecutionID,
 			r.Iteration,
 		),
-		TaskQueue:           r.TaskQueue(),
-		StartToCloseTimeout:    30 * time.Second,
-		ScheduleToCloseTimeout: 60 * time.Second,
+		TaskQueue:              r.TaskQueue(),
+		StartToCloseTimeout:    startToClose,
+		ScheduleToCloseTimeout: scheduleToClose,
 		RetryPolicy: &temporal.RetryPolicy{
 			MaximumAttempts:    failForAttempts + 1,
 			InitialInterval:    1 * time.Millisecond,
diff --git a/workers/go/singleactivityworkflow/workflow.go b/workers/go/singleactivityworkflow/workflow.go
index 7579326d..21aa19e0 100644
--- a/workers/go/singleactivityworkflow/workflow.go
+++ b/workers/go/singleactivityworkflow/workflow.go
@@ -18,7 +18,7 @@ func SingleActivityWorkflow(ctx workflow.Context, input []byte, outputSize int32
 	}
 	var output []byte
 	err := workflow.ExecuteActivity(workflow.WithActivityOptions(ctx, workflow.ActivityOptions{
-		StartToCloseTimeout: 5 * time.Second,
+		StartToCloseTimeout: 30 * time.Second,
 		RetryPolicy: &temporal.RetryPolicy{
 			MaximumAttempts:    failForAttempts + 1,
 			InitialInterval:    1 * time.Millisecond,