diff --git a/datadog_checks_base/changelog.d/23530.added b/datadog_checks_base/changelog.d/23530.added new file mode 100644 index 0000000000000..07aeeb425b1fe --- /dev/null +++ b/datadog_checks_base/changelog.d/23530.added @@ -0,0 +1 @@ +Add a per-performance-object `include_total` option (default `false`) to the Windows perf-counter framework. When set to `true`, the `_Total` aggregate instance is collected instead of being excluded by default. diff --git a/datadog_checks_base/datadog_checks/base/checks/windows/perf_counters/counter.py b/datadog_checks_base/datadog_checks/base/checks/windows/perf_counters/counter.py index 553a92b9cfe33..d2d11b411f9d8 100644 --- a/datadog_checks_base/datadog_checks/base/checks/windows/perf_counters/counter.py +++ b/datadog_checks_base/datadog_checks/base/checks/windows/perf_counters/counter.py @@ -55,6 +55,16 @@ def __init__(self, check, connection, name, config, use_localized_counters, tags # See: https://learn.microsoft.com/en-us/windows/win32/perfctrs/about-performance-counters self.include_pattern = re.compile('|'.join(include_patterns), re.IGNORECASE) + # Opt in to collect the `_Total` aggregate instance, which is excluded by default because + # it is usually derivable from the per-instance values. Some perf objects (e.g. + # `MSExchangeTransport Queues`) report data on `_Total` that is not the sum of the + # visible instances, in which case excluding it loses information. + include_total = config.get('include_total', False) + if not isinstance(include_total, bool): + raise ConfigTypeError( + f'Option `include_total` for performance object `{self.name}` must be a boolean' + ) + # List of regex patterns to filter multi-instance counters AFTER ALL data # is collected and retrieved from PDH layer exclude_patterns = config.get('exclude', []) @@ -67,11 +77,13 @@ def __init__(self, check, connection, name, config, use_localized_counters, tags f'Pattern #{i} of option `exclude` for performance object `{self.name}` must be a string' ) - final_exclude_patterns = [r'\b_Total\b'] + final_exclude_patterns = [] if include_total else [r'\b_Total\b'] final_exclude_patterns.extend(exclude_patterns) + # `(?!)` is a never-matching pattern, used when there is nothing to exclude so that + # `self.exclude_pattern.search(...)` always returns None without special-casing. # Instance names are not case-sensitive, so instances should not have names that differ only in case. # See: https://learn.microsoft.com/en-us/windows/win32/perfctrs/about-performance-counters - self.exclude_pattern = re.compile('|'.join(final_exclude_patterns), re.IGNORECASE) + self.exclude_pattern = re.compile('|'.join(final_exclude_patterns) or r'(?!)', re.IGNORECASE) # List of wildcards or instance name directly to filter multi-instance counters by PDH layer itself. # Thus it is faster and and less resource intensive than regex-based include filtering. diff --git a/datadog_checks_base/tests/base/checks/windows/perf_counters/test_filter.py b/datadog_checks_base/tests/base/checks/windows/perf_counters/test_filter.py index bd6d2058f53cb..80124360db242 100644 --- a/datadog_checks_base/tests/base/checks/windows/perf_counters/test_filter.py +++ b/datadog_checks_base/tests/base/checks/windows/perf_counters/test_filter.py @@ -75,3 +75,62 @@ def test_include_case_insensitive(aggregator, dd_run_check, mock_performance_obj aggregator.assert_metric_has_tag('test.foo.bar', 'instance:Barbat', count=0) aggregator.assert_all_metrics_covered() + + +def test_include_total(aggregator, dd_run_check, mock_performance_objects): + mock_performance_objects({'Foo': (['_Total', 'baz'], {'Bar': [1, 2]})}) + check = get_check( + {'metrics': {'Foo': {'name': 'foo', 'include_total': True, 'counters': [{'Bar': 'bar'}]}}} + ) + dd_run_check(check) + + tags = ['instance:_Total'] + tags.extend(GLOBAL_TAGS) + aggregator.assert_metric('test.foo.bar', 1, tags=tags) + + tags = ['instance:baz'] + tags.extend(GLOBAL_TAGS) + aggregator.assert_metric('test.foo.bar', 2, tags=tags) + + aggregator.assert_all_metrics_covered() + + +def test_include_total_with_lowercase_instance(aggregator, dd_run_check, mock_performance_objects): + mock_performance_objects({'Foo': (['_total', 'baz'], {'Bar': [1, 2]})}) + check = get_check( + {'metrics': {'Foo': {'name': 'foo', 'include_total': True, 'counters': [{'Bar': 'bar'}]}}} + ) + dd_run_check(check) + + tags = ['instance:_total'] + tags.extend(GLOBAL_TAGS) + aggregator.assert_metric('test.foo.bar', 1, tags=tags) + + aggregator.assert_metric_has_tag('test.foo.bar', 'instance:baz', count=1) + + aggregator.assert_all_metrics_covered() + + +def test_include_total_respects_user_exclude(aggregator, dd_run_check, mock_performance_objects): + mock_performance_objects({'Foo': (['_Total', 'baz'], {'Bar': [1, 2]})}) + check = get_check( + { + 'metrics': { + 'Foo': { + 'name': 'foo', + 'include_total': True, + 'exclude': ['baz'], + 'counters': [{'Bar': 'bar'}], + } + } + } + ) + dd_run_check(check) + + tags = ['instance:_Total'] + tags.extend(GLOBAL_TAGS) + aggregator.assert_metric('test.foo.bar', 1, tags=tags) + + aggregator.assert_metric_has_tag('test.foo.bar', 'instance:baz', count=0) + + aggregator.assert_all_metrics_covered() diff --git a/datadog_checks_dev/datadog_checks/dev/tooling/templates/configuration/common/perf_counters.yaml b/datadog_checks_dev/datadog_checks/dev/tooling/templates/configuration/common/perf_counters.yaml index d5b5a59f6e617..907d772c65e18 100644 --- a/datadog_checks_dev/datadog_checks/dev/tooling/templates/configuration/common/perf_counters.yaml +++ b/datadog_checks_dev/datadog_checks/dev/tooling/templates/configuration/common/perf_counters.yaml @@ -26,7 +26,15 @@ include: This is the list of regular expressions used to select which instances to monitor. If not set, all instances are monitored. exclude: This is the list of regular expressions used to select which instances to ignore. - If not set, no instances are ignored. Note: `_Total` instances are always ignored. + If not set, no instances are ignored. Note: `_Total` instances are ignored by default; + set `include_total` to `true` to collect them. + include_total: Whether to collect the `_Total` aggregate instance for this performance object. + Defaults to `false`. Most performance objects report `_Total` as the sum of the + individual instances, in which case it is preferable to compute the total in + Datadog. However, some perf objects (e.g. `MSExchangeTransport Queues`) report + data on `_Total` that is not derivable from the visible instances, and in that + case opting in restores the missing data without forcing the collection of every + individual instance. include_fast: This is the list of wildcards or exact instance names used to select which instances to monitor. It is faster than the regular expression `include` filter because it relies on the Windows PDH built-in wildcard filtering. @@ -95,6 +103,8 @@ type: array items: type: string + - name: include_total + type: boolean - name: instance_counts type: object properties: