diff --git a/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go b/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go index d620e3083..a0eba25a4 100644 --- a/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go +++ b/pkg/registry/file/dynamicpathdetector/analyze_endpoints.go @@ -14,42 +14,23 @@ func isWildcardPort(port string) bool { return port == "0" } -func rewritePort(endpoint, wildcardPort string) string { - if wildcardPort == "" { - return endpoint - } - port, pathPart := splitEndpointPortAndPath(endpoint) - if !isWildcardPort(port) { - return ":" + wildcardPort + pathPart - } - return endpoint -} - func AnalyzeEndpoints(endpoints *[]types.HTTPEndpoint, analyzer *PathAnalyzer) []types.HTTPEndpoint { if len(*endpoints) == 0 { return nil } - // Detect wildcard port in input (port 0 means any port) - wildcardPort := "" - for _, ep := range *endpoints { - port, _ := splitEndpointPortAndPath(ep.Endpoint) - if isWildcardPort(port) { - wildcardPort = port - break - } - } - - // First pass: build tree, redirecting to wildcard port if needed + // First pass: build the analyzer trie from each endpoint's true (port, + // path) tuple. Each port keys a separate sub-tree, so :0/foo and + // :443/foo are analyzed independently — :443/foo is NOT rewritten to + // :0/foo just because some unrelated endpoint also uses :0. for _, endpoint := range *endpoints { - _, _ = AnalyzeURL(rewritePort(endpoint.Endpoint, wildcardPort), analyzer) + _, _ = AnalyzeURL(endpoint.Endpoint, analyzer) } - // Second pass: process endpoints + // Second pass: process endpoints with their original ports. var newEndpoints []*types.HTTPEndpoint for _, endpoint := range *endpoints { ep := endpoint - ep.Endpoint = rewritePort(ep.Endpoint, wildcardPort) processedEndpoint, err := ProcessEndpoint(&ep, analyzer, newEndpoints) if processedEndpoint == nil && err == nil || err != nil { continue @@ -57,6 +38,8 @@ func AnalyzeEndpoints(endpoints *[]types.HTTPEndpoint, analyzer *PathAnalyzer) [ newEndpoints = append(newEndpoints, processedEndpoint) } + // Cross-port folding happens here: only same-(path, direction) siblings + // of an explicit :0 wildcard get absorbed into it. newEndpoints = MergeDuplicateEndpoints(newEndpoints) return convertPointerToValueSlice(newEndpoints) @@ -125,6 +108,19 @@ func splitEndpointPortAndPath(endpoint string) (string, string) { return s[:idx], s[idx:] } +// MergeDuplicateEndpoints folds duplicates and merges same-path specific-port +// endpoints into a wildcard-port (:0) sibling. The folding is symmetric: +// +// - If a specific-port endpoint is encountered AFTER its :0 sibling, the +// specific-port methods/headers are merged INTO the wildcard entry. +// - If a specific-port endpoint is encountered BEFORE its :0 sibling, it +// is initially recorded; when the wildcard arrives we sweep `seen` for +// same-(path, direction) specific-port siblings, fold them into the +// wildcard, and remove them from the output. +// +// This contract was tightened on the back of upstream review on +// kubescape/storage#316 — a single :0 entry must NOT cause unrelated +// concrete-port endpoints to be wildcarded; only same-path siblings fold. func MergeDuplicateEndpoints(endpoints []*types.HTTPEndpoint) []*types.HTTPEndpoint { seen := make(map[string]*types.HTTPEndpoint) var newEndpoints []*types.HTTPEndpoint @@ -137,15 +133,38 @@ func MergeDuplicateEndpoints(endpoints []*types.HTTPEndpoint) []*types.HTTPEndpo continue } - // Check if a wildcard port variant already exists (port 0 means any port) port, pathPart := splitEndpointPortAndPath(endpoint.Endpoint) - if !isWildcardPort(port) { - wildcardKey := fmt.Sprintf(":%s%s|%s", "0", pathPart, endpoint.Direction) - if existing, found := seen[wildcardKey]; found { - existing.Methods = MergeStrings(existing.Methods, endpoint.Methods) - mergeHeaders(existing, endpoint) - continue + + if isWildcardPort(port) { + // Wildcard arriving after specific-port siblings — sweep `seen` + // for any same-(path, direction) specific-port entries already + // recorded, fold them into the wildcard, then drop them from + // the output slice. + absorbed := false + for k, e := range seen { + ePort, ePath := splitEndpointPortAndPath(e.Endpoint) + if isWildcardPort(ePort) || ePath != pathPart || e.Direction != endpoint.Direction { + continue + } + endpoint.Methods = MergeStrings(endpoint.Methods, e.Methods) + mergeHeaders(endpoint, e) + delete(seen, k) + newEndpoints = removeEndpoint(newEndpoints, e) + absorbed = true } + seen[key] = endpoint + newEndpoints = append(newEndpoints, endpoint) + _ = absorbed + continue + } + + // Specific port: if a wildcard sibling for the same (path, direction) + // is already in `seen`, fold this entry into it. + wildcardKey := fmt.Sprintf(":0%s|%s", pathPart, endpoint.Direction) + if existing, found := seen[wildcardKey]; found { + existing.Methods = MergeStrings(existing.Methods, endpoint.Methods) + mergeHeaders(existing, endpoint) + continue } seen[key] = endpoint @@ -155,6 +174,18 @@ func MergeDuplicateEndpoints(endpoints []*types.HTTPEndpoint) []*types.HTTPEndpo return newEndpoints } +// removeEndpoint returns a new slice with the first occurrence of target +// removed (compared by pointer). Used by MergeDuplicateEndpoints when a +// previously-recorded specific-port entry is absorbed into a later wildcard. +func removeEndpoint(s []*types.HTTPEndpoint, target *types.HTTPEndpoint) []*types.HTTPEndpoint { + for i, e := range s { + if e == target { + return append(s[:i], s[i+1:]...) + } + } + return s +} + func getEndpointKey(endpoint *types.HTTPEndpoint) string { return fmt.Sprintf("%s|%s", endpoint.Endpoint, endpoint.Direction) } diff --git a/pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go b/pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go index 93172a1aa..ec7fe58a5 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go @@ -73,6 +73,12 @@ func TestAnalyzeEndpoints(t *testing.T) { }, }, { + // A single :0 (wildcard-port) entry MUST NOT contaminate + // unrelated concrete-port endpoints. Only same-(path, direction) + // siblings of an explicit :0 entry are folded into it; here the + // :80 and :8770 paths are distinct from the :0 path, so each + // endpoint stays on its own port. Regression test for the bug + // flagged in upstream review on kubescape/storage#316. name: "Test with 0 port", input: []types.HTTPEndpoint{ { @@ -88,10 +94,23 @@ func TestAnalyzeEndpoints(t *testing.T) { Methods: []string{"POST"}, }, }, + // NOTE: the analyzer trie persists across subtests in this + // table (analyzer is created outside the t.Run loop), so + // /users/\u22ef/posts/\u22ef has already been observed enough times + // for the :80 entry's trailing segment to be dynamicized; + // the :8770 path is fresh in the trie so it stays concrete. expected: []types.HTTPEndpoint{ { - Endpoint: ":0/users/\u22ef/posts/\u22ef", - Methods: []string{"GET", "POST"}, + Endpoint: ":0/users/123/posts/\u22ef", + Methods: []string{"GET"}, + }, + { + Endpoint: ":80/users/\u22ef/posts/\u22ef", + Methods: []string{"POST"}, + }, + { + Endpoint: ":8770/users/blub/posts/101", + Methods: []string{"POST"}, }, }, }, @@ -238,17 +257,21 @@ func TestAnalyzeEndpointsWithInvalidURL(t *testing.T) { assert.Equal(t, 0, len(result)) } -func TestAnalyzeEndpointsWildcardPortAbsorbsSpecificPort(t *testing.T) { +// TestAnalyzeEndpoints_WildcardDoesNotContaminateUnrelatedPaths pins the bug +// flagged by upstream review on kubescape/storage#316: a single wildcard-port +// endpoint must NOT cause unrelated specific-port endpoints (different path) +// to be rewritten to :0. Only same-path siblings should fold into the wildcard. +func TestAnalyzeEndpoints_WildcardDoesNotContaminateUnrelatedPaths(t *testing.T) { analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) input := []types.HTTPEndpoint{ { - Endpoint: ":0/users/123", + Endpoint: ":0/health", Methods: []string{"GET"}, Direction: "outbound", }, { - Endpoint: ":80/users/456", + Endpoint: ":443/login", Methods: []string{"POST"}, Direction: "outbound", }, @@ -256,34 +279,98 @@ func TestAnalyzeEndpointsWildcardPortAbsorbsSpecificPort(t *testing.T) { result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) + endpoints := make(map[string]bool, len(result)) for _, ep := range result { - port := ep.Endpoint[:len(":0")] - assert.Equal(t, ":0", port, "endpoint %s should have wildcard port", ep.Endpoint) + endpoints[ep.Endpoint] = true } + assert.Equal(t, 2, len(result), "unrelated paths must not be merged: got %v", endpoints) + assert.True(t, endpoints[":0/health"], "wildcard endpoint :0/health must be preserved") + assert.True(t, endpoints[":443/login"], "specific-port endpoint :443/login must keep its port (no wildcard sibling on the same path)") } -func TestAnalyzeEndpointsWildcardPortAfterSpecificPorts(t *testing.T) { +// TestAnalyzeEndpoints_SamePathSpecificFirstThenWildcard exercises the +// reverse-order case: the specific-port endpoint comes first in the slice, +// then a wildcard sibling on the SAME path. The two must merge into the +// wildcard. Without symmetric merging in MergeDuplicateEndpoints, the +// specific endpoint sticks around alongside the wildcard. +func TestAnalyzeEndpoints_SamePathSpecificFirstThenWildcard(t *testing.T) { analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) input := []types.HTTPEndpoint{ { - Endpoint: ":80/api/data", + Endpoint: ":443/login", + Methods: []string{"POST"}, + Direction: "outbound", + }, + { + Endpoint: ":0/login", + Methods: []string{"GET"}, + Direction: "outbound", + }, + } + + result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) + + assert.Equal(t, 1, len(result), "specific-port sibling must fold into the wildcard regardless of order") + assert.Equal(t, ":0/login", result[0].Endpoint) + assert.ElementsMatch(t, []string{"GET", "POST"}, result[0].Methods, "methods from both ports must be merged") +} + +// TestAnalyzeEndpoints_NoWildcardKeepsSpecificPort asserts that without ANY +// wildcard sibling, specific-port endpoints stay specific. A regression here +// would mean the analyzer is wildcarding too aggressively. +func TestAnalyzeEndpoints_NoWildcardKeepsSpecificPort(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) + + input := []types.HTTPEndpoint{ + { + Endpoint: ":443/login", + Methods: []string{"POST"}, + Direction: "outbound", + }, + } + + result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) + + assert.Equal(t, 1, len(result)) + assert.Equal(t, ":443/login", result[0].Endpoint, "no wildcard sibling => port must be preserved") +} + +// TestAnalyzeEndpoints_OnlyMatchingPathsFoldIntoWildcard combines the +// wildcard-contamination case with the same-path-merge case to verify both +// invariants hold simultaneously. :0/api absorbs :80/api (same path); but +// :443/admin (different path, no wildcard sibling) keeps its port. +func TestAnalyzeEndpoints_OnlyMatchingPathsFoldIntoWildcard(t *testing.T) { + analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil) + + input := []types.HTTPEndpoint{ + { + Endpoint: ":0/api", Methods: []string{"GET"}, Direction: "outbound", }, { - Endpoint: ":0/api/info", + Endpoint: ":80/api", Methods: []string{"POST"}, Direction: "outbound", }, + { + Endpoint: ":443/admin", + Methods: []string{"DELETE"}, + Direction: "outbound", + }, } result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer) + endpoints := make(map[string][]string, len(result)) for _, ep := range result { - port := ep.Endpoint[:len(":0")] - assert.Equal(t, ":0", port, "endpoint %s should have wildcard port", ep.Endpoint) + endpoints[ep.Endpoint] = ep.Methods } + + assert.Equal(t, 2, len(result), "expected :0/api and :443/admin, got %v", endpoints) + assert.ElementsMatch(t, []string{"GET", "POST"}, endpoints[":0/api"], "/api siblings must merge into wildcard") + assert.ElementsMatch(t, []string{"DELETE"}, endpoints[":443/admin"], ":443/admin must NOT be wildcarded — no wildcard sibling on /admin") } func TestAnalyzeEndpointsMultiplePortsMergeIntoWildcard(t *testing.T) { @@ -332,3 +419,39 @@ func TestMergeDuplicateEndpointsWildcardPort(t *testing.T) { assert.Equal(t, ":0/api/data", result[0].Endpoint) assert.Equal(t, []string{"GET", "POST"}, result[0].Methods) } + +// TestMergeDuplicateEndpoints_SpecificFirstThenWildcard pins the reverse +// order — specific-port endpoint encountered first, wildcard sibling second. +// Without symmetric merging in MergeDuplicateEndpoints both entries survive, +// which CodeRabbit flagged on PR #316. Locking the contract here. +func TestMergeDuplicateEndpoints_SpecificFirstThenWildcard(t *testing.T) { + specificEP := &types.HTTPEndpoint{ + Endpoint: ":80/api/data", + Methods: []string{"POST"}, + Direction: "outbound", + } + wildcardEP := &types.HTTPEndpoint{ + Endpoint: ":0/api/data", + Methods: []string{"GET"}, + Direction: "outbound", + } + + result := dynamicpathdetector.MergeDuplicateEndpoints([]*types.HTTPEndpoint{specificEP, wildcardEP}) + + assert.Equal(t, 1, len(result), "wildcard sibling must absorb the earlier specific-port entry") + assert.Equal(t, ":0/api/data", result[0].Endpoint) + assert.ElementsMatch(t, []string{"GET", "POST"}, result[0].Methods) +} + +// TestMergeDuplicateEndpoints_NoWildcardKeepsAllSpecificPorts asserts that +// without a wildcard sibling, distinct (port,path) pairs all survive. +// A regression here would mean the merge is collapsing too aggressively. +func TestMergeDuplicateEndpoints_NoWildcardKeepsAllSpecificPorts(t *testing.T) { + a := &types.HTTPEndpoint{Endpoint: ":80/api/data", Methods: []string{"GET"}, Direction: "outbound"} + b := &types.HTTPEndpoint{Endpoint: ":443/api/data", Methods: []string{"POST"}, Direction: "outbound"} + c := &types.HTTPEndpoint{Endpoint: ":8080/api/data", Methods: []string{"PUT"}, Direction: "outbound"} + + result := dynamicpathdetector.MergeDuplicateEndpoints([]*types.HTTPEndpoint{a, b, c}) + + assert.Equal(t, 3, len(result), "no wildcard sibling => all specific-port endpoints must be kept") +}