Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/registry/file/applicationprofile_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (a *ApplicationProfileProcessor) SetStorage(containerProfileStorage Contain
}

func deflateApplicationProfileContainer(container softwarecomposition.ApplicationProfileContainer, sbomSet mapset.Set[string]) softwarecomposition.ApplicationProfileContainer {
opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs), sbomSet)
opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs()), sbomSet)
if err != nil {
logger.L().Debug("falling back to DeflateStringer for opens", loggerhelpers.Error(err))
opens = DeflateStringer(container.Opens)
Expand Down
12 changes: 10 additions & 2 deletions pkg/registry/file/applicationprofile_processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,16 @@ func TestDeflateApplicationProfileContainer_MixedPathsCollapse(t *testing.T) {
})
}

// /etc uses the /etc config threshold from DefaultCollapseConfigs (100)
etcThreshold := 100
// /etc uses the /etc config threshold from DefaultCollapseConfigs.
// Derive from the live config so this test stays in sync if the
// production threshold for /etc ever changes — hardcoding 100 here
// previously meant the test would silently pass even when
// DefaultCollapseConfigs drifted (CodeRabbit C5).
etcAnalyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(
dynamicpathdetector.OpenDynamicThreshold,
dynamicpathdetector.DefaultCollapseConfigs(),
)
etcThreshold := etcAnalyzer.FindConfigForPath("/etc/file").Threshold
for i := 0; i < etcThreshold+1; i++ {
opens = append(opens, softwarecomposition.OpenCalls{
Path: fmt.Sprintf("/etc/conf%d.cfg", i),
Expand Down
2 changes: 1 addition & 1 deletion pkg/registry/file/containerprofile_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ func (a *ContainerProfileProcessor) getAggregatedData(ctx context.Context, key s
}

func DeflateContainerProfileSpec(container softwarecomposition.ContainerProfileSpec, sbomSet mapset.Set[string]) softwarecomposition.ContainerProfileSpec {
opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs), sbomSet)
opens, err := dynamicpathdetector.AnalyzeOpens(container.Opens, dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.OpenDynamicThreshold, dynamicpathdetector.DefaultCollapseConfigs()), sbomSet)
if err != nil {
logger.L().Debug("ContainerProfileProcessor.deflateContainerProfileSpec - falling back to DeflateStringer for opens", loggerhelpers.Error(err))
opens = DeflateStringer(container.Opens)
Expand Down
29 changes: 27 additions & 2 deletions pkg/registry/file/dynamicpathdetector/analyze_endpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"strings"

mapset "github.com/deckarep/golang-set/v2"
"github.com/kubescape/go-logger"
loggerhelpers "github.com/kubescape/go-logger/helpers"
types "github.com/kubescape/storage/pkg/apis/softwarecomposition"
)

Expand Down Expand Up @@ -99,8 +101,26 @@ func AnalyzeURL(urlString string, analyzer *PathAnalyzer) (string, error) {
return ":" + port + path, nil
}

// splitEndpointPortAndPath splits the canonical `:<port><path>` form
// produced by AnalyzeURL into its (port, path) parts.
//
// Defensive contract: AnalyzeURL guarantees a leading `:` and a port
// segment, but callers and tests sometimes pass bare paths (e.g.
// "/health") for ad-hoc lookups. To keep merge keys deterministic,
// this helper returns empty port + leading-slash-normalised path for
// any input that does not start with `:`. The empty string returns
// ("", "/") to match the original fall-through behavior.
func splitEndpointPortAndPath(endpoint string) (string, string) {
s := strings.TrimPrefix(endpoint, ":")
if !strings.HasPrefix(endpoint, ":") {
if endpoint == "" {
return "", "/"
}
if !strings.HasPrefix(endpoint, "/") {
endpoint = "/" + endpoint
}
return "", endpoint
}
s := endpoint[1:]
idx := strings.Index(s, "/")
if idx == -1 {
return s, "/"
Expand Down Expand Up @@ -220,7 +240,12 @@ func mergeHeaders(existing, new *types.HTTPEndpoint) {

rawJSON, err := json.Marshal(existingHeaders)
if err != nil {
fmt.Println("Error marshaling JSON:", err)
// Don't pollute stdout from a library function. The caller has
// no signal-back path here (mergeHeaders is a void helper) so
// log at Debug and bail — leaving Headers untouched is the
// safer choice than corrupting them with a partial marshal.
logger.L().Debug("mergeHeaders: failed to marshal merged headers, leaving existing untouched",
loggerhelpers.Error(err))
return
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package dynamicpathdetector

import (
"testing"

"github.com/stretchr/testify/assert"
)

// TestSplitEndpointPortAndPath_DefensiveContract pins the inputs that
// AnalyzeURL is supposed to produce (`:<port><path>`) AND the defensive
// behavior for bare-path / empty / no-leading-slash inputs that may
// arrive via ad-hoc lookups or tests. Without the guard, "foo" was
// returned as ("foo", "/") — silently treating an opaque token as a
// port number. Flagged on upstream PR #316 by CodeRabbit (C7).
func TestSplitEndpointPortAndPath_DefensiveContract(t *testing.T) {
tests := []struct {
name string
input string
wantPort string
wantPath string
}{
// Canonical AnalyzeURL output.
{"empty", "", "", "/"},
{"port_only", ":80", "80", "/"},
{"port_with_root_path", ":80/", "80", "/"},
{"port_with_path", ":80/health", "80", "/health"},
{"wildcard_port", ":0", "0", "/"},
{"wildcard_port_with_path", ":0/api/users", "0", "/api/users"},
{"port_with_deep_path", ":443/v1/items/42", "443", "/v1/items/42"},

// Defensive — bare paths arriving without the `:` prefix.
{"bare_path", "/health", "", "/health"},
{"bare_root", "/", "", "/"},
{"bare_deep_path", "/v1/items/42", "", "/v1/items/42"},

// Defensive — opaque token without a leading slash. Previous
// behavior silently returned ("foo", "/") which would be
// indistinguishable from port="foo". The guard normalises this
// to ("", "/foo").
{"opaque_token", "foo", "", "/foo"},
{"opaque_with_dot", "host.example.com", "", "/host.example.com"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotPort, gotPath := splitEndpointPortAndPath(tt.input)
assert.Equal(t, tt.wantPort, gotPort,
"splitEndpointPortAndPath(%q) port = %q, want %q",
tt.input, gotPort, tt.wantPort)
assert.Equal(t, tt.wantPath, gotPath,
"splitEndpointPortAndPath(%q) path = %q, want %q",
tt.input, gotPath, tt.wantPath)
})
}
}
111 changes: 76 additions & 35 deletions pkg/registry/file/dynamicpathdetector/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,38 +347,75 @@ func shallowChildrenCopy(src, dst *SegmentNode) {
}
}

// CompareDynamic checks whether `regularPath` is matched by `dynamicPath`,
// where `dynamicPath` may contain DynamicIdentifier (⋯, single-segment
// wildcard) or WildcardIdentifier (*, zero-or-more-segment wildcard).
// The previous implementation only handled DynamicIdentifier, causing
// explicit `/etc/*` profile entries to never match at runtime — the
// node-agent R0002 rule (Files Access Anomalies) uses this to decide
// whether a file access is in-profile.
// CompareDynamic checks whether `regularPath` is matched by `dynamicPath`.
// The dynamic path may contain DynamicIdentifier (⋯, exactly-one-segment
// wildcard) or WildcardIdentifier (*, zero-or-more-segment mid-path /
// one-or-more-segment trailing wildcard). The node-agent R0002 rule
// (Files Access Anomalies) uses this at every file-open to decide whether
// the access is in-profile.
//
// Anchoring contract:
// - Anchored patterns (start with `/`): `/etc/*` matches files UNDER
// /etc but NOT the bare `/etc` directory itself, mirroring shell
// glob semantics. This avoids R0002 silently allowing access to a
// profiled directory's parent.
// - Unanchored `*` (no leading slash): explicit catch-all that also
// matches the root path `/`. The only way to whitelist `/` itself
// is an explicit unanchored `*`.
//
// Trailing-slash insensitivity: `/etc/` is treated as `/etc`, and
// `/etc/passwd/` as `/etc/passwd`. Trailing empty path components from
// `strings.Split` are trimmed so `len(regular) > 0` correctly reflects
// the presence of a real path tail when matching trailing `*`.
//
// The empty regular path (`""`) is treated as "no path" and matches
// nothing — distinct from the root path `/`, which matches unanchored
// `*` per the contract above.
func CompareDynamic(dynamicPath, regularPath string) bool {
dynamicSegments := strings.Split(dynamicPath, "/")
regularSegments := strings.Split(regularPath, "/")
return compareSegments(dynamicSegments, regularSegments)
// Empty inputs match nothing. Note that splitPath("") and splitPath("/")
// both yield [""] after trim, so without this guard an empty profile
// entry would silently match the root path.
if dynamicPath == "" || regularPath == "" {
return false
}
return compareSegments(splitPath(dynamicPath), splitPath(regularPath))
}

// splitPath splits a path on `/` and trims trailing empty segments
// produced by trailing slashes (e.g. `/etc/` -> ["", "etc"] not
// ["", "etc", ""]). The leading empty segment from a leading slash is
// preserved as the anchor marker. Single-element results are not
// trimmed so the root path `/` retains its `[""]` shape.
func splitPath(p string) []string {
s := strings.Split(p, "/")
for len(s) > 1 && s[len(s)-1] == "" {
s = s[:len(s)-1]
}
return s
}

func compareSegments(dynamic, regular []string) bool {
if len(dynamic) == 0 {
return len(regular) == 0
}
if dynamic[0] == WildcardIdentifier {
// A trailing `*` matches any remaining path tail (including empty).
// Trailing `*` matches one OR MORE remaining segments — never
// zero. This is what makes `/etc/*` not match the bare `/etc`
// directory, while still matching `/etc/passwd` and any deeper
// path. The unanchored-`*` case (regular path is `/`, regular
// slice is [""]) returns true because len(regular) == 1.
if len(dynamic) == 1 {
return true
return len(regular) > 0
}
// Try to match the rest of the dynamic pattern starting at every
// position in the regular tail — including i == 0 (the wildcard
// consumed zero segments) and every later offset (wildcard
// consumed i segments). No optimistic peek at dynamic[1]: that
// optimization used to require regular[i] to literally equal
// dynamic[1], which is wrong whenever dynamic[1] is itself
// another `*` (consecutive wildcards like `/*/*` would never
// recurse and thus never match — user-authored profiles can
// contain literal /*/* patterns even though analyzer-generated
// ones are squashed by collapseAdjacentDynamicIdentifiers).
// Mid-path `*`: zero-or-more semantics. Try every offset
// including i == 0 (wildcard consumed zero segments). No
// optimistic peek at dynamic[1]: that optimization used to
// require regular[i] to literally equal dynamic[1], which is
// wrong whenever dynamic[1] is itself another `*` (consecutive
// wildcards like `/*/*` would never recurse and thus never
// match — user-authored profiles can contain literal /*/*
// patterns even though analyzer-generated ones are squashed by
// collapseAdjacentDynamicIdentifiers).
for i := 0; i <= len(regular); i++ {
if compareSegments(dynamic[1:], regular[i:]) {
return true
Expand All @@ -395,27 +432,31 @@ func compareSegments(dynamic, regular []string) bool {
return false
}

// FindConfigForPath returns the CollapseConfig whose Prefix matches
// `path` with the longest match, or nil if none match. Exposed so
// callers and tests can introspect which threshold will apply to a
// given path without walking the trie.
func (ua *PathAnalyzer) FindConfigForPath(path string) *CollapseConfig {
var best *CollapseConfig
// FindConfigForPath returns a value copy of the CollapseConfig whose
// Prefix matches `path` with the longest match. Falls back to the
// analyzer's default config (Prefix:"/") when no per-prefix override
// applies, so the result is always meaningful — there is no "no match"
// signal.
//
// Returning by value keeps the analyzer's internal state immutable
// from callers. NewPathAnalyzerWithConfigs already makes a defensive
// inbound copy of `configs`; this is its outbound twin. Without it,
// `cfg := analyzer.FindConfigForPath(p); cfg.Threshold = 1` would
// silently mutate the analyzer's threshold map for every future call.
func (ua *PathAnalyzer) FindConfigForPath(path string) CollapseConfig {
bestIdx := -1
bestLen := -1
for i := range ua.configs {
cfg := &ua.configs[i]
if hasPrefixAtBoundary(path, cfg.Prefix) && len(cfg.Prefix) > bestLen {
best = cfg
bestIdx = i
bestLen = len(cfg.Prefix)
}
}
// Fall back to the `/` default config if no per-prefix override
// matched — callers expect a non-nil result when *any* threshold
// applies, and the default always applies.
if best == nil {
return &ua.defaultCfg
if bestIdx == -1 {
return ua.defaultCfg
}
return best
return ua.configs[bestIdx]
}

// CollapseAdjacentDynamicIdentifiers replaces runs of adjacent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -924,11 +924,48 @@ func TestFindConfigForPath(t *testing.T) {
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
config := analyzer.FindConfigForPath(tt.path)
assert.NotNil(t, config, "config should not be nil for path %q", tt.path)
assert.Equal(t, tt.expectedPrefix, config.Prefix,
"path %q should match prefix %q", tt.path, tt.expectedPrefix)
assert.Equal(t, tt.expectedThreshold, config.Threshold,
"path %q should have threshold %d", tt.path, tt.expectedThreshold)
})
}
}

// TestFindConfigForPath_DefensiveCopy pins the contract that the
// returned CollapseConfig is a value copy, so mutating it does NOT
// alter the analyzer's internal state. Same defensive philosophy as
// DefaultCollapseConfigs(): the caller cannot accidentally turn an
// introspection helper into a hidden mutator.
func TestFindConfigForPath_DefensiveCopy(t *testing.T) {
analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(
dynamicpathdetector.OpenDynamicThreshold,
dynamicpathdetector.DefaultCollapseConfigs(),
)

// Per-prefix override path: snapshot, mutate the copy, look up
// again, expect the analyzer to be unaffected.
first := analyzer.FindConfigForPath("/etc/file")
originalThreshold := first.Threshold
first.Threshold = 999_999
first.Prefix = "/poisoned"

second := analyzer.FindConfigForPath("/etc/file")
assert.Equal(t, originalThreshold, second.Threshold,
"mutating the first call's CollapseConfig must not change the analyzer state")
assert.NotEqual(t, "/poisoned", second.Prefix,
"mutating the first call's Prefix must not leak into the second call")

// Default-fallback path: same contract for the path that doesn't
// match any per-prefix override.
firstDefault := analyzer.FindConfigForPath("/no/match/anywhere")
originalDefaultThreshold := firstDefault.Threshold
firstDefault.Threshold = 12345
firstDefault.Prefix = "/poisoned-default"

secondDefault := analyzer.FindConfigForPath("/no/match/anywhere")
assert.Equal(t, originalDefaultThreshold, secondDefault.Threshold,
"mutating the default config copy must not change the analyzer state")
assert.NotEqual(t, "/poisoned-default", secondDefault.Prefix,
"mutating the default config Prefix must not leak into a future call")
}
46 changes: 38 additions & 8 deletions pkg/registry/file/dynamicpathdetector/tests/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,44 @@ func BenchmarkAnalyzeOpensVsDeflateStringer(b *testing.B) {
})
}

// func BenchmarkCompareDynamic(b *testing.B) {
// dynamicPath := "/api/\u22ef/\u22ef"
// regularPath := "/api/users/123"
// for i := 0; i < b.N; i++ {
// _ = dynamicpathdetector.CompareDynamic(dynamicPath, regularPath)
// }
// b.ReportAllocs()
// }
// BenchmarkCompareDynamic exercises the matcher under realistic shapes.
// Run with -benchmem; the goal is the zero-alloc target Matthias called
// out on upstream PR #316. Until the perf rewrite lands the numbers
// document the current cost so regressions show up. Inputs are split
// across cases so the benchmark covers:
//
// - DynamicIdentifier-only paths (auto-generated short patterns)
// - DynamicIdentifier-only paths (deeper, more typical R0002 traffic)
// - WildcardIdentifier with mid-path zero-or-more semantics
// - Trailing WildcardIdentifier (the regression-prone path)
// - Anchored / unanchored boundary cases (the new `*` vs `/*` contract)
func BenchmarkCompareDynamic(b *testing.B) {
cases := []struct {
name string
dynamic string
regular string
}{
{"ellipsis_short", "/api/\u22ef/\u22ef", "/api/users/123"},
{"ellipsis_deep", "/api/\u22ef/\u22ef/\u22ef/\u22ef", "/api/users/123/posts/42"},
{"trailing_star", "/etc/*", "/etc/ssh/sshd_config"},
{"trailing_star_no_match_on_parent", "/etc/*", "/etc"},
{"mid_star_zero_consumed", "/a/*/b", "/a/b"},
{"mid_star_many_consumed", "/a/*/b", "/a/x/y/z/b"},
{"anchored_root_no_match", "/*", "/"},
{"unanchored_star_root", "*", "/"},
{"deep_literal_match", "/var/log/syslog", "/var/log/syslog"},
{"deep_literal_mismatch", "/var/log/syslog", "/var/log/messages"},
}
for _, c := range cases {
b.Run(c.name, func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = dynamicpathdetector.CompareDynamic(c.dynamic, c.regular)
}
})
}
}

func generateMixedPaths(count int, fixedLength int) []string {
paths := make([]string, count)
Expand Down
Loading
Loading