From b70f9c217014a5cc8d5d23b3e8fa3a548249567e Mon Sep 17 00:00:00 2001 From: zanejohnson-azure Date: Wed, 22 Apr 2026 13:40:16 -0700 Subject: [PATCH 1/4] Rename telegraf process-metrics instance to telegraf-process-metrics Give the telegraf instance that collects process metrics a distinct process name so it can be distinguished from the main telegraf process. - Linux: create symlink /opt/telegraf-process-metrics -> /opt/telegraf - Windows: create symlink telegraf-process-metrics.exe -> telegraf.exe - Both configs: add new procstat entry for telegraf-process-metrics while keeping the existing telegraf entry --- .../conf/telegraf-ama-logs-process-metrics.conf | 15 +++++++++++++++ .../conf/telegraf-ama-logs-process-metrics.conf | 15 +++++++++++++++ kubernetes/linux/main.sh | 4 +++- kubernetes/windows/main.ps1 | 6 ++++-- 4 files changed, 37 insertions(+), 3 deletions(-) diff --git a/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf b/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf index 79c7bf941..7ab182f23 100644 --- a/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf +++ b/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf @@ -62,6 +62,21 @@ ControllerType = "$CONTROLLER_TYPE" AksResourceId = "$AKS_RESOURCE_ID" +[[inputs.procstat]] + name_prefix = "t.azm.ms/" + exe = "telegraf-process-metrics" + interval = "60s" + pid_finder = "native" + pid_tag = true + name_override = "agent_telemetry" + fieldpass = ["cpu_usage", "memory_rss"] + [inputs.procstat.tags] + Computer = "placeholder_hostname" + PodName = "placeholder_podname" + AgentVersion = "$AGENT_VERSION" + ControllerType = "$CONTROLLER_TYPE" + AksResourceId = "$AKS_RESOURCE_ID" + [[inputs.procstat]] name_prefix = "t.azm.ms/" exe = "ruby" diff --git a/build/windows/installer/conf/telegraf-ama-logs-process-metrics.conf b/build/windows/installer/conf/telegraf-ama-logs-process-metrics.conf index 0acebd90f..eede99f6d 100644 --- a/build/windows/installer/conf/telegraf-ama-logs-process-metrics.conf +++ b/build/windows/installer/conf/telegraf-ama-logs-process-metrics.conf @@ -45,6 +45,21 @@ ControllerType = "DaemonSet-Windows" AksResourceId = "placeholder_aksresourceid" +[[inputs.procstat]] + name_prefix = "t.azm.ms/" + exe = "telegraf-process-metrics" + interval = "60s" + pid_finder = "native" + pid_tag = true + name_override = "agent_telemetry" + fieldpass = ["cpu_usage", "memory_rss"] + [inputs.procstat.tags] + Computer = "placeholder_hostname" + PodName = "placeholder_podname" + AgentVersion = "$AGENT_VERSION" + ControllerType = "DaemonSet-Windows" + AksResourceId = "placeholder_aksresourceid" + [[inputs.procstat]] name_prefix = "t.azm.ms/" exe = "MonAgentLauncher" diff --git a/kubernetes/linux/main.sh b/kubernetes/linux/main.sh index 864797c04..7747a679f 100644 --- a/kubernetes/linux/main.sh +++ b/kubernetes/linux/main.sh @@ -1413,7 +1413,9 @@ if [ "${AZMON_COLLECT_AMA_LOGS_PROCESS_METRICS}" == "true" ]; then appinsightsKey=$(echo "$APPLICATIONINSIGHTS_AUTH" | base64 -d | tr -d '\n') sed -i -e "s/placeholder_appinsights_key/$appinsightsKey/g" $amaLogsProcessMetricsConfFile # Use /proc so telegraf only collect process metrics inside ama-logs containers. - HOST_PROC=/proc /opt/telegraf --non-strict-env-handling --config $amaLogsProcessMetricsConfFile & + # Create symlink to give this telegraf instance a distinct process name + ln -sf /opt/telegraf /opt/telegraf-process-metrics + HOST_PROC=/proc /opt/telegraf-process-metrics --non-strict-env-handling --config $amaLogsProcessMetricsConfFile & else echo "APPLICATIONINSIGHTS_AUTH or AKS_RESOURCE_ID not set, skipping ama-logs process metrics monitoring" fi diff --git a/kubernetes/windows/main.ps1 b/kubernetes/windows/main.ps1 index cb30f4234..abc61a5a0 100644 --- a/kubernetes/windows/main.ps1 +++ b/kubernetes/windows/main.ps1 @@ -883,8 +883,10 @@ function Start-Fluent-Telegraf { $appInsightsKey = [System.Text.Encoding]::UTF8.GetString([System.Convert]::FromBase64String($appInsightsAuth)).Trim() (Get-Content $amaLogsProcessMetricsConfFile).replace('placeholder_appinsights_key', $appInsightsKey) | Set-Content $amaLogsProcessMetricsConfFile Write-Host "Starting telegraf for collecting process metrics inside ama-logs containers (Windows)" - C:\opt\telegraf\telegraf.exe --service install --service-name telegraf-ama-logs-process-metrics --config $amaLogsProcessMetricsConfFile - C:\opt\telegraf\telegraf.exe --service start --service-name telegraf-ama-logs-process-metrics + # Create symlink to give this telegraf instance a distinct process name + New-Item -ItemType SymbolicLink -Path "C:\opt\telegraf\telegraf-process-metrics.exe" -Target "C:\opt\telegraf\telegraf.exe" -Force + C:\opt\telegraf\telegraf-process-metrics.exe --service install --service-name telegraf-ama-logs-process-metrics --config $amaLogsProcessMetricsConfFile + C:\opt\telegraf\telegraf-process-metrics.exe --service start --service-name telegraf-ama-logs-process-metrics } else { Write-Host "APPLICATIONINSIGHTS_AUTH or AKS_RESOURCE_ID not set, skipping ama-logs process metrics monitoring" } From a43cc2dbede9db9f244b7c0e4df4bfa6a08726da Mon Sep 17 00:00:00 2001 From: zanejohnson-azure Date: Wed, 22 Apr 2026 15:11:21 -0700 Subject: [PATCH 2/4] fix: use pattern instead of exe for procstat process matching /proc//exe resolves symlinks, so both telegraf processes appear as exe="telegraf". Switch to pattern-based cmdline matching: - Linux: "telegraf --" for main, "telegraf-process-metrics" for 2nd - Windows: "telegraf.exe --" for main, "telegraf-process-metrics" for 2nd --- .../installer/conf/telegraf-ama-logs-process-metrics.conf | 4 ++-- .../installer/conf/telegraf-ama-logs-process-metrics.conf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf b/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf index 7ab182f23..2aa06931a 100644 --- a/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf +++ b/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf @@ -49,7 +49,7 @@ [[inputs.procstat]] name_prefix = "t.azm.ms/" - exe = "telegraf" + pattern = "telegraf --" interval = "60s" pid_finder = "native" pid_tag = true @@ -64,7 +64,7 @@ [[inputs.procstat]] name_prefix = "t.azm.ms/" - exe = "telegraf-process-metrics" + pattern = "telegraf-process-metrics" interval = "60s" pid_finder = "native" pid_tag = true diff --git a/build/windows/installer/conf/telegraf-ama-logs-process-metrics.conf b/build/windows/installer/conf/telegraf-ama-logs-process-metrics.conf index eede99f6d..238fddb4a 100644 --- a/build/windows/installer/conf/telegraf-ama-logs-process-metrics.conf +++ b/build/windows/installer/conf/telegraf-ama-logs-process-metrics.conf @@ -32,7 +32,7 @@ [[inputs.procstat]] name_prefix = "t.azm.ms/" - exe = "telegraf" + pattern = "telegraf.exe --" interval = "60s" pid_finder = "native" pid_tag = true @@ -47,7 +47,7 @@ [[inputs.procstat]] name_prefix = "t.azm.ms/" - exe = "telegraf-process-metrics" + pattern = "telegraf-process-metrics" interval = "60s" pid_finder = "native" pid_tag = true From bc38d8c359f34f3b54486e2fce41a8cf7b571738 Mon Sep 17 00:00:00 2001 From: zanejohnson-azure Date: Fri, 24 Apr 2026 11:54:48 -0700 Subject: [PATCH 3/4] fix: change fluentd procstat from pattern to exe matching Change `pattern = "fluentd"` to `exe = "fluentd"` so it only matches the fluentd supervisor process (PID name=fluentd), not both supervisor and worker. The ruby worker is already tracked by `exe = "ruby"`. This fixes duplicate values showing for ruby and fluentd on the dashboard. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../linux/installer/conf/telegraf-ama-logs-process-metrics.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf b/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf index 2aa06931a..a42a3432e 100644 --- a/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf +++ b/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf @@ -124,7 +124,7 @@ [[inputs.procstat]] name_prefix = "t.azm.ms/" - pattern = "fluentd" + exe = "fluentd" interval = "60s" pid_finder = "native" pid_tag = true From a26d0541071f6d280e0cc2d6e50b377824715cdc Mon Sep 17 00:00:00 2001 From: zanejohnson-azure Date: Fri, 24 Apr 2026 15:52:24 -0700 Subject: [PATCH 4/4] Revert "fix: change fluentd procstat from pattern to exe matching" This reverts commit bc38d8c359f34f3b54486e2fce41a8cf7b571738. --- .../linux/installer/conf/telegraf-ama-logs-process-metrics.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf b/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf index a42a3432e..2aa06931a 100644 --- a/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf +++ b/build/linux/installer/conf/telegraf-ama-logs-process-metrics.conf @@ -124,7 +124,7 @@ [[inputs.procstat]] name_prefix = "t.azm.ms/" - exe = "fluentd" + pattern = "fluentd" interval = "60s" pid_finder = "native" pid_tag = true