From 793a5ac0de9fbde4851f4336740dd491e8c7c141 Mon Sep 17 00:00:00 2001 From: entlein Date: Fri, 1 May 2026 18:43:34 +0200 Subject: [PATCH 1/2] addressing the tests, the now more defensive guards for empty, more explicit tests and behavior for empty versus wildcard, restores the benchmark test and addresses the nitpick comments - does not include the performance issue re the zero-alloc Signed-off-by: entlein --- .../file/applicationprofile_processor.go | 2 +- .../file/applicationprofile_processor_test.go | 12 +- .../file/containerprofile_processor.go | 2 +- .../dynamicpathdetector/analyze_endpoints.go | 29 ++- .../analyze_endpoints_internal_test.go | 55 +++++ .../file/dynamicpathdetector/analyzer.go | 81 +++++-- .../tests/benchmark_test.go | 46 +++- .../tests/coverage_test.go | 227 ++++++++++++++++++ .../dynamicpathdetector/tests/profile_test.go | 35 ++- .../file/dynamicpathdetector/types.go | 16 +- 10 files changed, 454 insertions(+), 51 deletions(-) create mode 100644 pkg/registry/file/dynamicpathdetector/analyze_endpoints_internal_test.go diff --git a/pkg/registry/file/applicationprofile_processor.go b/pkg/registry/file/applicationprofile_processor.go index e823fe4a9..ee0509eb6 100644 --- a/pkg/registry/file/applicationprofile_processor.go +++ b/pkg/registry/file/applicationprofile_processor.go @@ -107,7 +107,7 @@ func (a *ApplicationProfileProcessor) SetStorage(containerProfileStorage Contain } func deflateApplicationProfileContainer(container softwarecomposition.ApplicationProfileContainer, sbomSet mapset.Set[string]) softwarecomposition.ApplicationProfileContainer { - opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs), sbomSet) + opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs()), sbomSet) if err != nil { logger.L().Debug("falling back to DeflateStringer for opens", loggerhelpers.Error(err)) opens = DeflateStringer(container.Opens) diff --git a/pkg/registry/file/applicationprofile_processor_test.go b/pkg/registry/file/applicationprofile_processor_test.go index e09ed0e15..62574f116 100644 --- a/pkg/registry/file/applicationprofile_processor_test.go +++ b/pkg/registry/file/applicationprofile_processor_test.go @@ -344,8 +344,16 @@ func TestDeflateApplicationProfileContainer_MixedPathsCollapse(t *testing.T) { }) } - // /etc uses the /etc config threshold from DefaultCollapseConfigs (100) - etcThreshold := 100 + // /etc uses the /etc config threshold from DefaultCollapseConfigs. + // Derive from the live config so this test stays in sync if the + // production threshold for /etc ever changes — hardcoding 100 here + // previously meant the test would silently pass even when + // DefaultCollapseConfigs drifted (CodeRabbit C5). + etcAnalyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs( + dynamicpathdetector.OpenDynamicThreshold, + dynamicpathdetector.DefaultCollapseConfigs(), + ) + etcThreshold := etcAnalyzer.FindConfigForPath("/etc/file").Threshold for i := 0; i < etcThreshold+1; i++ { opens = append(opens, softwarecomposition.OpenCalls{ Path: fmt.Sprintf("/etc/conf%d.cfg", i), diff --git a/pkg/registry/file/containerprofile_processor.go b/pkg/registry/file/containerprofile_processor.go index b8df5897a..29077b338 100644 --- a/pkg/registry/file/containerprofile_processor.go +++ b/pkg/registry/file/containerprofile_processor.go @@ -743,7 +743,7 @@ func (a *ContainerProfileProcessor) getAggregatedData(ctx context.Context, key s } func DeflateContainerProfileSpec(container softwarecomposition.ContainerProfileSpec, sbomSet mapset.Set[string]) softwarecomposition.ContainerProfileSpec { - opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs), sbomSet) + opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs()), sbomSet) if err != nil { logger.L().Debug("ContainerProfileProcessor.deflateContainerProfileSpec - falling back to DeflateStringer for opens", loggerhelpers.Error(err)) opens = DeflateStringer(container.Opens) diff --git a/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go b/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go index 721406ecf..278a158e5 100644 --- a/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go +++ b/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go @@ -7,6 +7,8 @@ import ( "strings" mapset "github.com/deckarep/golang-set/v2" + "github.com/kubescape/go-logger" + loggerhelpers "github.com/kubescape/go-logger/helpers" types "github.com/kubescape/storage/pkg/apis/softwarecomposition" ) @@ -99,8 +101,26 @@ func AnalyzeURL(urlString string, analyzer *PathAnalyzer) (string, error) { return ":" + port + path, nil } +// splitEndpointPortAndPath splits the canonical `:` form +// produced by AnalyzeURL into its (port, path) parts. +// +// Defensive contract: AnalyzeURL guarantees a leading `:` and a port +// segment, but callers and tests sometimes pass bare paths (e.g. +// "/health") for ad-hoc lookups. To keep merge keys deterministic, +// this helper returns empty port + leading-slash-normalised path for +// any input that does not start with `:`. The empty string returns +// ("", "/") to match the original fall-through behavior. func splitEndpointPortAndPath(endpoint string) (string, string) { - s := strings.TrimPrefix(endpoint, ":") + if !strings.HasPrefix(endpoint, ":") { + if endpoint == "" { + return "", "/" + } + if !strings.HasPrefix(endpoint, "/") { + endpoint = "/" + endpoint + } + return "", endpoint + } + s := endpoint[1:] idx := strings.Index(s, "/") if idx == -1 { return s, "/" @@ -220,7 +240,12 @@ func mergeHeaders(existing, new *types.HTTPEndpoint) { rawJSON, err := json.Marshal(existingHeaders) if err != nil { - fmt.Println("Error marshaling JSON:", err) + // Don't pollute stdout from a library function. The caller has + // no signal-back path here (mergeHeaders is a void helper) so + // log at Debug and bail — leaving Headers untouched is the + // safer choice than corrupting them with a partial marshal. + logger.L().Debug("mergeHeaders: failed to marshal merged headers, leaving existing untouched", + loggerhelpers.Error(err)) return } diff --git a/pkg/registry/file/dynamicpathdetector/analyze_endpoints_internal_test.go b/pkg/registry/file/dynamicpathdetector/analyze_endpoints_internal_test.go new file mode 100644 index 000000000..b10fe21eb --- /dev/null +++ b/pkg/registry/file/dynamicpathdetector/analyze_endpoints_internal_test.go @@ -0,0 +1,55 @@ +package dynamicpathdetector + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestSplitEndpointPortAndPath_DefensiveContract pins the inputs that +// AnalyzeURL is supposed to produce (`:`) AND the defensive +// behavior for bare-path / empty / no-leading-slash inputs that may +// arrive via ad-hoc lookups or tests. Without the guard, "foo" was +// returned as ("foo", "/") — silently treating an opaque token as a +// port number. Flagged on upstream PR #316 by CodeRabbit (C7). +func TestSplitEndpointPortAndPath_DefensiveContract(t *testing.T) { + tests := []struct { + name string + input string + wantPort string + wantPath string + }{ + // Canonical AnalyzeURL output. + {"empty", "", "", "/"}, + {"port_only", ":80", "80", "/"}, + {"port_with_root_path", ":80/", "80", "/"}, + {"port_with_path", ":80/health", "80", "/health"}, + {"wildcard_port", ":0", "0", "/"}, + {"wildcard_port_with_path", ":0/api/users", "0", "/api/users"}, + {"port_with_deep_path", ":443/v1/items/42", "443", "/v1/items/42"}, + + // Defensive — bare paths arriving without the `:` prefix. + {"bare_path", "/health", "", "/health"}, + {"bare_root", "/", "", "/"}, + {"bare_deep_path", "/v1/items/42", "", "/v1/items/42"}, + + // Defensive — opaque token without a leading slash. Previous + // behavior silently returned ("foo", "/") which would be + // indistinguishable from port="foo". The guard normalises this + // to ("", "/foo"). + {"opaque_token", "foo", "", "/foo"}, + {"opaque_with_dot", "host.example.com", "", "/host.example.com"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotPort, gotPath := splitEndpointPortAndPath(tt.input) + assert.Equal(t, tt.wantPort, gotPort, + "splitEndpointPortAndPath(%q) port = %q, want %q", + tt.input, gotPort, tt.wantPort) + assert.Equal(t, tt.wantPath, gotPath, + "splitEndpointPortAndPath(%q) path = %q, want %q", + tt.input, gotPath, tt.wantPath) + }) + } +} diff --git a/pkg/registry/file/dynamicpathdetector/analyzer.go b/pkg/registry/file/dynamicpathdetector/analyzer.go index 9a0b517bc..74e31aca4 100644 --- a/pkg/registry/file/dynamicpathdetector/analyzer.go +++ b/pkg/registry/file/dynamicpathdetector/analyzer.go @@ -347,17 +347,51 @@ func shallowChildrenCopy(src, dst *SegmentNode) { } } -// CompareDynamic checks whether `regularPath` is matched by `dynamicPath`, -// where `dynamicPath` may contain DynamicIdentifier (⋯, single-segment -// wildcard) or WildcardIdentifier (*, zero-or-more-segment wildcard). -// The previous implementation only handled DynamicIdentifier, causing -// explicit `/etc/*` profile entries to never match at runtime — the -// node-agent R0002 rule (Files Access Anomalies) uses this to decide -// whether a file access is in-profile. +// CompareDynamic checks whether `regularPath` is matched by `dynamicPath`. +// The dynamic path may contain DynamicIdentifier (⋯, exactly-one-segment +// wildcard) or WildcardIdentifier (*, zero-or-more-segment mid-path / +// one-or-more-segment trailing wildcard). The node-agent R0002 rule +// (Files Access Anomalies) uses this at every file-open to decide whether +// the access is in-profile. +// +// Anchoring contract: +// - Anchored patterns (start with `/`): `/etc/*` matches files UNDER +// /etc but NOT the bare `/etc` directory itself, mirroring shell +// glob semantics. This avoids R0002 silently allowing access to a +// profiled directory's parent. +// - Unanchored `*` (no leading slash): explicit catch-all that also +// matches the root path `/`. The only way to whitelist `/` itself +// is an explicit unanchored `*`. +// +// Trailing-slash insensitivity: `/etc/` is treated as `/etc`, and +// `/etc/passwd/` as `/etc/passwd`. Trailing empty path components from +// `strings.Split` are trimmed so `len(regular) > 0` correctly reflects +// the presence of a real path tail when matching trailing `*`. +// +// The empty regular path (`""`) is treated as "no path" and matches +// nothing — distinct from the root path `/`, which matches unanchored +// `*` per the contract above. func CompareDynamic(dynamicPath, regularPath string) bool { - dynamicSegments := strings.Split(dynamicPath, "/") - regularSegments := strings.Split(regularPath, "/") - return compareSegments(dynamicSegments, regularSegments) + // Empty inputs match nothing. Note that splitPath("") and splitPath("/") + // both yield [""] after trim, so without this guard an empty profile + // entry would silently match the root path. + if dynamicPath == "" || regularPath == "" { + return false + } + return compareSegments(splitPath(dynamicPath), splitPath(regularPath)) +} + +// splitPath splits a path on `/` and trims trailing empty segments +// produced by trailing slashes (e.g. `/etc/` -> ["", "etc"] not +// ["", "etc", ""]). The leading empty segment from a leading slash is +// preserved as the anchor marker. Single-element results are not +// trimmed so the root path `/` retains its `[""]` shape. +func splitPath(p string) []string { + s := strings.Split(p, "/") + for len(s) > 1 && s[len(s)-1] == "" { + s = s[:len(s)-1] + } + return s } func compareSegments(dynamic, regular []string) bool { @@ -365,20 +399,23 @@ func compareSegments(dynamic, regular []string) bool { return len(regular) == 0 } if dynamic[0] == WildcardIdentifier { - // A trailing `*` matches any remaining path tail (including empty). + // Trailing `*` matches one OR MORE remaining segments — never + // zero. This is what makes `/etc/*` not match the bare `/etc` + // directory, while still matching `/etc/passwd` and any deeper + // path. The unanchored-`*` case (regular path is `/`, regular + // slice is [""]) returns true because len(regular) == 1. if len(dynamic) == 1 { - return true + return len(regular) > 0 } - // Try to match the rest of the dynamic pattern starting at every - // position in the regular tail — including i == 0 (the wildcard - // consumed zero segments) and every later offset (wildcard - // consumed i segments). No optimistic peek at dynamic[1]: that - // optimization used to require regular[i] to literally equal - // dynamic[1], which is wrong whenever dynamic[1] is itself - // another `*` (consecutive wildcards like `/*/*` would never - // recurse and thus never match — user-authored profiles can - // contain literal /*/* patterns even though analyzer-generated - // ones are squashed by collapseAdjacentDynamicIdentifiers). + // Mid-path `*`: zero-or-more semantics. Try every offset + // including i == 0 (wildcard consumed zero segments). No + // optimistic peek at dynamic[1]: that optimization used to + // require regular[i] to literally equal dynamic[1], which is + // wrong whenever dynamic[1] is itself another `*` (consecutive + // wildcards like `/*/*` would never recurse and thus never + // match — user-authored profiles can contain literal /*/* + // patterns even though analyzer-generated ones are squashed by + // collapseAdjacentDynamicIdentifiers). for i := 0; i <= len(regular); i++ { if compareSegments(dynamic[1:], regular[i:]) { return true diff --git a/pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go b/pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go index 09dbdf56a..d1a8f2a24 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go @@ -72,14 +72,44 @@ func BenchmarkAnalyzeOpensVsDeflateStringer(b *testing.B) { }) } -// func BenchmarkCompareDynamic(b *testing.B) { -// dynamicPath := "/api/\u22ef/\u22ef" -// regularPath := "/api/users/123" -// for i := 0; i < b.N; i++ { -// _ = dynamicpathdetector.CompareDynamic(dynamicPath, regularPath) -// } -// b.ReportAllocs() -// } +// BenchmarkCompareDynamic exercises the matcher under realistic shapes. +// Run with -benchmem; the goal is the zero-alloc target Matthias called +// out on upstream PR #316. Until the perf rewrite lands the numbers +// document the current cost so regressions show up. Inputs are split +// across cases so the benchmark covers: +// +// - DynamicIdentifier-only paths (auto-generated short patterns) +// - DynamicIdentifier-only paths (deeper, more typical R0002 traffic) +// - WildcardIdentifier with mid-path zero-or-more semantics +// - Trailing WildcardIdentifier (the regression-prone path) +// - Anchored / unanchored boundary cases (the new `*` vs `/*` contract) +func BenchmarkCompareDynamic(b *testing.B) { + cases := []struct { + name string + dynamic string + regular string + }{ + {"ellipsis_short", "/api/\u22ef/\u22ef", "/api/users/123"}, + {"ellipsis_deep", "/api/\u22ef/\u22ef/\u22ef/\u22ef", "/api/users/123/posts/42"}, + {"trailing_star", "/etc/*", "/etc/ssh/sshd_config"}, + {"trailing_star_no_match_on_parent", "/etc/*", "/etc"}, + {"mid_star_zero_consumed", "/a/*/b", "/a/b"}, + {"mid_star_many_consumed", "/a/*/b", "/a/x/y/z/b"}, + {"anchored_root_no_match", "/*", "/"}, + {"unanchored_star_root", "*", "/"}, + {"deep_literal_match", "/var/log/syslog", "/var/log/syslog"}, + {"deep_literal_mismatch", "/var/log/syslog", "/var/log/messages"}, + } + for _, c := range cases { + b.Run(c.name, func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = dynamicpathdetector.CompareDynamic(c.dynamic, c.regular) + } + }) + } +} func generateMixedPaths(count int, fixedLength int) []string { paths := make([]string, count) diff --git a/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go b/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go index e6b6ab057..3b0956572 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/coverage_test.go @@ -6,6 +6,7 @@ import ( "github.com/kubescape/storage/pkg/registry/file/dynamicpathdetector" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // configThreshold returns the collapse threshold for the given path prefix @@ -418,3 +419,229 @@ func TestCompareDynamic_WildcardRegressions(t *testing.T) { }) } } + +// TestCompareDynamic_AnchoringAndTrailing pins the trailing-`*` / +// anchoring contract reported on upstream PR #316. +// +// The first wildcard-aware implementation made a trailing `*` match +// zero-or-more remaining segments, so `/etc/*` silently matched the +// bare `/etc` directory — widening R0002's blind spot to cover the +// profiled directory's parent. Standard shell glob requires trailing +// `*` to consume one or more segments. Anchoring rules: +// +// - Anchored: leading `/` makes `/*` "any path strictly under /" +// and explicitly excludes the bare `/` directory. +// - Unanchored: a bare `*` (no leading slash) is the only way to +// allowlist the root path itself. +// +// Trailing slashes on the regular path are normalized away so +// `/etc/passwd/` is treated as `/etc/passwd`. +func TestCompareDynamic_AnchoringAndTrailing(t *testing.T) { + tests := []struct { + name string + dynamic string + regular string + want bool + }{ + // Root-only cases — anchored vs unanchored distinction. + {"anchored_star_does_not_match_root", "/*", "/", false}, + {"anchored_star_does_not_match_empty", "/*", "", false}, + {"anchored_star_matches_top_level_child", "/*", "/foo", true}, + {"anchored_star_matches_deeper_child", "/*", "/foo/bar", true}, + {"unanchored_star_matches_root", "*", "/", true}, + {"unanchored_star_matches_top_level_child", "*", "/foo", true}, + {"unanchored_star_does_not_match_empty", "*", "", false}, + + // Bare-parent boundary — the original /etc/* regression. + {"trailing_star_does_not_match_bare_parent", "/etc/*", "/etc", false}, + {"trailing_star_does_not_match_parent_with_slash", "/etc/*", "/etc/", false}, + {"trailing_star_matches_immediate_child", "/etc/*", "/etc/passwd", true}, + {"trailing_star_matches_deep_child", "/etc/*", "/etc/ssh/sshd_config", true}, + {"trailing_star_matches_child_with_trailing_slash", "/etc/*", "/etc/passwd/", true}, + {"deep_trailing_star_does_not_match_short_path", "/var/log/*", "/var/log", false}, + {"deep_trailing_star_does_not_match_grandparent", "/var/log/*", "/var", false}, + {"deep_trailing_star_matches_child", "/var/log/*", "/var/log/syslog", true}, + + // Multiple trailing wildcards — zero-or-more mid-* + one-or-more + // final-* together. The mid-* may consume zero, so /etc/*/* still + // matches /etc/ssh (one segment) by having the inner * consume 0 + // and the trailing * consume the segment. + {"double_trailing_does_not_match_parent", "/etc/*/*", "/etc", false}, + {"double_trailing_does_not_match_parent_slash", "/etc/*/*", "/etc/", false}, + {"double_trailing_matches_one_child", "/etc/*/*", "/etc/ssh", true}, + {"double_trailing_matches_two_children", "/etc/*/*", "/etc/ssh/sshd_config", true}, + {"double_trailing_matches_deep", "/etc/*/*", "/etc/ssh/dir/file", true}, + + // Empty / bare-pattern edges. + {"empty_dynamic_does_not_match_path", "", "/foo", false}, + {"empty_dynamic_does_not_match_root", "", "/", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := dynamicpathdetector.CompareDynamic(tt.dynamic, tt.regular) + assert.Equal(t, tt.want, got, + "CompareDynamic(%q, %q) = %v, want %v", tt.dynamic, tt.regular, got, tt.want) + }) + } +} + +// TestCompareDynamic_EllipsisAndStar pins the interaction between the +// two wildcard kinds: +// +// - DynamicIdentifier (⋯) consumes EXACTLY ONE segment. +// - WildcardIdentifier (*) consumes ZERO-OR-MORE segments mid-path +// and ONE-OR-MORE segments when trailing. +// +// Mixing them (e.g. `/⋯/*`) is the analyzer's normal output for a +// fully-collapsed grandchild branch: ⋯ pins the immediate child to +// "any single segment" and * accepts the deeper tail. +func TestCompareDynamic_EllipsisAndStar(t *testing.T) { + tests := []struct { + name string + dynamic string + regular string + want bool + }{ + // ⋯ alone: exactly one segment. + {"ellipsis_matches_exactly_one", "/⋯/foo", "/x/foo", true}, + {"ellipsis_does_not_consume_zero", "/⋯/foo", "/foo", false}, + {"ellipsis_does_not_consume_two", "/⋯/foo", "/x/y/foo", false}, + + // ⋯ then trailing *: ⋯ consumes 1, * needs ≥1 more. + {"ellipsis_then_trailing_star_two_segments", "/⋯/*", "/x/y", true}, + {"ellipsis_then_trailing_star_three_segments", "/⋯/*", "/x/y/z", true}, + {"ellipsis_then_trailing_star_one_segment_fails", "/⋯/*", "/x", false}, + {"ellipsis_then_trailing_star_root_fails", "/⋯/*", "/", false}, + + // Mid-* before ⋯: * may consume zero, ⋯ still needs exactly one. + {"star_then_ellipsis_two_segments", "/*/⋯", "/a/b", true}, + {"star_consumed_zero_then_ellipsis_matches_one", "/*/⋯", "/b", true}, + {"star_then_ellipsis_one_segment_fails_when_zero_consumed", "/*/⋯", "/", false}, + + // Nested ⋯. + {"nested_ellipsis_matches_two", "/⋯/⋯/foo", "/x/y/foo", true}, + {"nested_ellipsis_does_not_match_one", "/⋯/⋯/foo", "/x/foo", false}, + + // * literal * pattern around a static segment. + {"star_literal_star_matches", "/*/etc/*", "/foo/etc/passwd", true}, + {"star_literal_star_no_trailing_segment_fails", "/*/etc/*", "/foo/etc", false}, + {"star_literal_star_no_leading_consumed_zero_matches", "/*/etc/*", "/etc/passwd", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := dynamicpathdetector.CompareDynamic(tt.dynamic, tt.regular) + assert.Equal(t, tt.want, got, + "CompareDynamic(%q, %q) = %v, want %v", tt.dynamic, tt.regular, got, tt.want) + }) + } +} + +// TestCompareDynamic_MidPathStarZeroOrMore explicitly pins the +// zero-or-more semantics for `*` when it appears mid-path. This is +// distinct from trailing `*` (which is one-or-more) and is what allows +// auto-generated patterns like `/foo/*/bar` to also match `/foo/bar` +// (the wildcard consumed nothing, then the literal segment matched). +func TestCompareDynamic_MidPathStarZeroOrMore(t *testing.T) { + tests := []struct { + name string + dynamic string + regular string + want bool + }{ + {"mid_star_consumes_zero", "/a/*/b", "/a/b", true}, + {"mid_star_consumes_one", "/a/*/b", "/a/x/b", true}, + {"mid_star_consumes_many", "/a/*/b", "/a/x/y/b", true}, + {"mid_star_literal_after_mismatches", "/a/*/b", "/a/x/c", false}, + {"mid_star_literal_prefix_mismatch", "/a/*/b", "/z/x/b", false}, + + // Consecutive mid-stars — both can independently consume zero. + {"consecutive_mid_star_both_zero", "/a/*/*/b", "/a/b", true}, + {"consecutive_mid_star_one_zero_one_one", "/a/*/*/b", "/a/x/b", true}, + {"consecutive_mid_star_both_one", "/a/*/*/b", "/a/x/y/b", true}, + {"consecutive_mid_star_deeper", "/a/*/*/b", "/a/x/y/z/b", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := dynamicpathdetector.CompareDynamic(tt.dynamic, tt.regular) + assert.Equal(t, tt.want, got, + "CompareDynamic(%q, %q) = %v, want %v", tt.dynamic, tt.regular, got, tt.want) + }) + } +} + +// TestDefaultCollapseConfigs_DefensiveCopy pins the contract that the +// public DefaultCollapseConfigs() accessor returns a fresh slice on +// every call, so callers cannot accidentally mutate the package-level +// state. Without this guard, a downstream consumer modifying the +// returned slice (sorting, appending, swapping prefixes) would silently +// affect every subsequent call, including AnalyzeOpens in the storage +// deflate path. The unexported var is the canonical source of truth. +func TestDefaultCollapseConfigs_DefensiveCopy(t *testing.T) { + first := dynamicpathdetector.DefaultCollapseConfigs() + require.NotEmpty(t, first, "default configs must not be empty") + + // Mutate the returned slice in two ways: change a Threshold and + // append a junk config. Neither should leak to the next call. + first[0].Threshold = 999_999 + first = append(first, dynamicpathdetector.CollapseConfig{ + Prefix: "/poisoned", Threshold: 1, + }) + + second := dynamicpathdetector.DefaultCollapseConfigs() + assert.NotEqual(t, 999_999, second[0].Threshold, + "mutating the first call's slice must not change the package state") + for _, cfg := range second { + assert.NotEqual(t, "/poisoned", cfg.Prefix, + "appending to the first call's slice must not leak into the second call") + } + + // Also assert the two slices are distinct backing arrays — without + // this, len(second) would happen to be safe but a future caller + // reading first[len-1] could observe the appended element. + if len(first) > 0 && len(second) > 0 { + // Address-of-element comparison is sufficient; if the underlying + // array is shared, &first[0] == &second[0]. + assert.NotSame(t, &first[0], &second[0], + "DefaultCollapseConfigs must return a fresh backing array") + } +} + +// TestCompareDynamic_PathSeparatorEdges documents how `/`-related +// edges are normalized: trailing slashes are insignificant, the +// regular path `""` is treated as no-path (matches nothing), and the +// internal split-and-trim normalization is exercised on both sides. +func TestCompareDynamic_PathSeparatorEdges(t *testing.T) { + tests := []struct { + name string + dynamic string + regular string + want bool + }{ + // Trailing slash on regular — should match same as without. + {"trailing_slash_on_regular_matches_literal", "/etc/passwd", "/etc/passwd/", true}, + {"trailing_slash_on_regular_for_directory_match", "/etc", "/etc/", true}, + + // Trailing slash on dynamic — should match same as without. + {"trailing_slash_on_dynamic_literal", "/etc/passwd/", "/etc/passwd", true}, + {"trailing_slash_on_dynamic_with_star", "/etc/*/", "/etc/passwd", true}, + + // Empty regular path — matches nothing, including the bare star. + {"empty_regular_does_not_match_anchored", "/foo", "", false}, + {"empty_regular_does_not_match_unanchored_literal", "foo", "", false}, + {"empty_regular_does_not_match_star", "*", "", false}, + + // Empty dynamic — matches nothing. + {"empty_dynamic_does_not_match_anything", "", "/foo", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := dynamicpathdetector.CompareDynamic(tt.dynamic, tt.regular) + assert.Equal(t, tt.want, got, + "CompareDynamic(%q, %q) = %v, want %v", tt.dynamic, tt.regular, got, tt.want) + }) + } +} diff --git a/pkg/registry/file/dynamicpathdetector/tests/profile_test.go b/pkg/registry/file/dynamicpathdetector/tests/profile_test.go index d9db951c8..b70adb5ee 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/profile_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/profile_test.go @@ -20,7 +20,6 @@ package dynamicpathdetectortests import ( "flag" - "fmt" "os" "path/filepath" "runtime" @@ -49,7 +48,10 @@ func TestProfileAnalyzePath(t *testing.T) { // Generate a representative mixed workload once, outside the measured // section, so its allocations don't show up in the profile. paths := generateMixedPaths(10000, 0) - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs( + dynamicpathdetector.OpenDynamicThreshold, + dynamicpathdetector.DefaultCollapseConfigs(), + ) identifier := "profile" // Warm up the analyzer so the trie is populated. Steady-state calls @@ -80,7 +82,9 @@ func TestProfileAnalyzePath(t *testing.T) { for i := 0; i < *profileIters; i++ { if _, err := analyzer.AnalyzePath(paths[i%len(paths)], identifier); err != nil { pprof.StopCPUProfile() - cpuFile.Close() + if cerr := cpuFile.Close(); cerr != nil { + t.Logf("close cpu profile after error: %v", cerr) + } t.Fatalf("AnalyzePath iter %d: %v", i, err) } } @@ -94,7 +98,9 @@ func TestProfileAnalyzePath(t *testing.T) { runtime.ReadMemStats(&after) pprof.StopCPUProfile() - cpuFile.Close() + if err := cpuFile.Close(); err != nil { + t.Fatalf("close cpu profile: %v", err) + } // Heap profile (alloc_space). memPath := filepath.Join(*profileOutDir, "mem.out") @@ -105,7 +111,9 @@ func TestProfileAnalyzePath(t *testing.T) { if err := pprof.Lookup("allocs").WriteTo(memFile, 0); err != nil { t.Fatalf("write mem profile: %v", err) } - memFile.Close() + if err := memFile.Close(); err != nil { + t.Fatalf("close mem profile: %v", err) + } // Goroutine snapshot (useful when debugging leaks). goPath := filepath.Join(*profileOutDir, "goroutine.out") @@ -116,7 +124,9 @@ func TestProfileAnalyzePath(t *testing.T) { if err := pprof.Lookup("goroutine").WriteTo(goFile, 0); err != nil { t.Fatalf("write goroutine profile: %v", err) } - goFile.Close() + if err := goFile.Close(); err != nil { + t.Fatalf("close goroutine profile: %v", err) + } totalBytes := after.TotalAlloc - before.TotalAlloc totalMallocs := after.Mallocs - before.Mallocs @@ -135,7 +145,10 @@ func TestProfileAnalyzePath(t *testing.T) { // new SegmentNode. func BenchmarkAnalyzePathWarm(b *testing.B) { paths := generateMixedPaths(10000, 0) - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs( + dynamicpathdetector.OpenDynamicThreshold, + dynamicpathdetector.DefaultCollapseConfigs(), + ) identifier := "warm" for _, p := range paths { if _, err := analyzer.AnalyzePath(p, identifier); err != nil { @@ -162,12 +175,12 @@ func BenchmarkAnalyzePathCold(b *testing.B) { b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { - analyzer := dynamicpathdetector.NewPathAnalyzer(100) + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs( + dynamicpathdetector.OpenDynamicThreshold, + dynamicpathdetector.DefaultCollapseConfigs(), + ) if _, err := analyzer.AnalyzePath(paths[i%len(paths)], identifier); err != nil { b.Fatalf("AnalyzePath: %v", err) } } } - -// Ensure fmt is kept imported when future debugging prints land here. -var _ = fmt.Sprint diff --git a/pkg/registry/file/dynamicpathdetector/types.go b/pkg/registry/file/dynamicpathdetector/types.go index b581dee90..251c77f29 100644 --- a/pkg/registry/file/dynamicpathdetector/types.go +++ b/pkg/registry/file/dynamicpathdetector/types.go @@ -26,16 +26,24 @@ type CollapseConfig struct { Threshold int } -// DefaultCollapseConfigs carries the per-prefix thresholds we've found +// defaultCollapseConfigs carries the per-prefix thresholds we've found // useful in practice. These are the defaults wired into AnalyzeOpens; a // caller can pass a different slice via NewPathAnalyzerWithConfigs if -// they want to tune for their workload. -var DefaultCollapseConfigs = []CollapseConfig{ +// they want to tune for their workload. Unexported so callers cannot +// mutate the package-level slice — access via DefaultCollapseConfigs(). +var defaultCollapseConfigs = []CollapseConfig{ {Prefix: "/etc", Threshold: 100}, {Prefix: "/etc/apache2", Threshold: 50}, // tuned for the webapp standard test {Prefix: "/opt", Threshold: 50}, {Prefix: "/var/run", Threshold: 50}, - {Prefix: "/app", Threshold: 50}, // any variation under /app collapses immediately + {Prefix: "/app", Threshold: 50}, // any variation under /app collapses at 50 unique children +} + +// DefaultCollapseConfigs returns a defensive copy of the package-level +// default per-prefix collapse thresholds. Callers that mutate the result +// will not affect the package state or other callers. +func DefaultCollapseConfigs() []CollapseConfig { + return append([]CollapseConfig(nil), defaultCollapseConfigs...) } // DefaultCollapseConfig is the global fallback used when no CollapseConfig From 07dcd1bf7fd2d3e0ea6b47d7215b4da46f96239d Mon Sep 17 00:00:00 2001 From: Entlein Date: Fri, 1 May 2026 18:55:45 +0200 Subject: [PATCH 2/2] fix(analyzer): FindConfigForPath returns CollapseConfig by value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address CodeRabbit's outbound-mutation footgun: the public lookup returned *CollapseConfig pointing into ua.configs / ua.defaultCfg, so 'cfg := analyzer.FindConfigForPath(p); cfg.Threshold = 1' silently mutated the analyzer's threshold map for every future call. That defeated the defensive inbound copy NewPathAnalyzerWithConfigs already makes. Switch to value return. The fall-through to defaultCfg is now also a value copy, so callers cannot poison the analyzer through that path either. Doc updated — the old 'or nil if none match' wording was wrong; the function never returned nil, it always falls back to defaultCfg. TestFindConfigForPath_DefensiveCopy pins the contract: mutating the returned config (Threshold and Prefix) does not affect a subsequent call, exercised on both the per-prefix and default-fallback paths. TestFindConfigForPath drops the dead 'NotNil' assertion (a value can't be nil); field reads work unchanged on both pointer and value. --- .../file/dynamicpathdetector/analyzer.go | 30 +++++++------- .../tests/analyze_opens_test.go | 39 ++++++++++++++++++- 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/pkg/registry/file/dynamicpathdetector/analyzer.go b/pkg/registry/file/dynamicpathdetector/analyzer.go index 74e31aca4..b4562ef76 100644 --- a/pkg/registry/file/dynamicpathdetector/analyzer.go +++ b/pkg/registry/file/dynamicpathdetector/analyzer.go @@ -432,27 +432,31 @@ func compareSegments(dynamic, regular []string) bool { return false } -// FindConfigForPath returns the CollapseConfig whose Prefix matches -// `path` with the longest match, or nil if none match. Exposed so -// callers and tests can introspect which threshold will apply to a -// given path without walking the trie. -func (ua *PathAnalyzer) FindConfigForPath(path string) *CollapseConfig { - var best *CollapseConfig +// FindConfigForPath returns a value copy of the CollapseConfig whose +// Prefix matches `path` with the longest match. Falls back to the +// analyzer's default config (Prefix:"/") when no per-prefix override +// applies, so the result is always meaningful — there is no "no match" +// signal. +// +// Returning by value keeps the analyzer's internal state immutable +// from callers. NewPathAnalyzerWithConfigs already makes a defensive +// inbound copy of `configs`; this is its outbound twin. Without it, +// `cfg := analyzer.FindConfigForPath(p); cfg.Threshold = 1` would +// silently mutate the analyzer's threshold map for every future call. +func (ua *PathAnalyzer) FindConfigForPath(path string) CollapseConfig { + bestIdx := -1 bestLen := -1 for i := range ua.configs { cfg := &ua.configs[i] if hasPrefixAtBoundary(path, cfg.Prefix) && len(cfg.Prefix) > bestLen { - best = cfg + bestIdx = i bestLen = len(cfg.Prefix) } } - // Fall back to the `/` default config if no per-prefix override - // matched — callers expect a non-nil result when *any* threshold - // applies, and the default always applies. - if best == nil { - return &ua.defaultCfg + if bestIdx == -1 { + return ua.defaultCfg } - return best + return ua.configs[bestIdx] } // CollapseAdjacentDynamicIdentifiers replaces runs of adjacent diff --git a/pkg/registry/file/dynamicpathdetector/tests/analyze_opens_test.go b/pkg/registry/file/dynamicpathdetector/tests/analyze_opens_test.go index d2a83b11f..2c55e7d4d 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/analyze_opens_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/analyze_opens_test.go @@ -924,7 +924,6 @@ func TestFindConfigForPath(t *testing.T) { for _, tt := range tests { t.Run(tt.path, func(t *testing.T) { config := analyzer.FindConfigForPath(tt.path) - assert.NotNil(t, config, "config should not be nil for path %q", tt.path) assert.Equal(t, tt.expectedPrefix, config.Prefix, "path %q should match prefix %q", tt.path, tt.expectedPrefix) assert.Equal(t, tt.expectedThreshold, config.Threshold, @@ -932,3 +931,41 @@ func TestFindConfigForPath(t *testing.T) { }) } } + +// TestFindConfigForPath_DefensiveCopy pins the contract that the +// returned CollapseConfig is a value copy, so mutating it does NOT +// alter the analyzer's internal state. Same defensive philosophy as +// DefaultCollapseConfigs(): the caller cannot accidentally turn an +// introspection helper into a hidden mutator. +func TestFindConfigForPath_DefensiveCopy(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs( + dynamicpathdetector.OpenDynamicThreshold, + dynamicpathdetector.DefaultCollapseConfigs(), + ) + + // Per-prefix override path: snapshot, mutate the copy, look up + // again, expect the analyzer to be unaffected. + first := analyzer.FindConfigForPath("/etc/file") + originalThreshold := first.Threshold + first.Threshold = 999_999 + first.Prefix = "/poisoned" + + second := analyzer.FindConfigForPath("/etc/file") + assert.Equal(t, originalThreshold, second.Threshold, + "mutating the first call's CollapseConfig must not change the analyzer state") + assert.NotEqual(t, "/poisoned", second.Prefix, + "mutating the first call's Prefix must not leak into the second call") + + // Default-fallback path: same contract for the path that doesn't + // match any per-prefix override. + firstDefault := analyzer.FindConfigForPath("/no/match/anywhere") + originalDefaultThreshold := firstDefault.Threshold + firstDefault.Threshold = 12345 + firstDefault.Prefix = "/poisoned-default" + + secondDefault := analyzer.FindConfigForPath("/no/match/anywhere") + assert.Equal(t, originalDefaultThreshold, secondDefault.Threshold, + "mutating the default config copy must not change the analyzer state") + assert.NotEqual(t, "/poisoned-default", secondDefault.Prefix, + "mutating the default config Prefix must not leak into a future call") +}