Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 63 additions & 32 deletions pkg/registry/file/dynamicpathdetector/analyze_endpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,49 +14,32 @@ func isWildcardPort(port string) bool {
return port == "0"
}

func rewritePort(endpoint, wildcardPort string) string {
if wildcardPort == "" {
return endpoint
}
port, pathPart := splitEndpointPortAndPath(endpoint)
if !isWildcardPort(port) {
return ":" + wildcardPort + pathPart
}
return endpoint
}

func AnalyzeEndpoints(endpoints *[]types.HTTPEndpoint, analyzer *PathAnalyzer) []types.HTTPEndpoint {
if len(*endpoints) == 0 {
return nil
}

// Detect wildcard port in input (port 0 means any port)
wildcardPort := ""
for _, ep := range *endpoints {
port, _ := splitEndpointPortAndPath(ep.Endpoint)
if isWildcardPort(port) {
wildcardPort = port
break
}
}

// First pass: build tree, redirecting to wildcard port if needed
// First pass: build the analyzer trie from each endpoint's true (port,
// path) tuple. Each port keys a separate sub-tree, so :0/foo and
// :443/foo are analyzed independently — :443/foo is NOT rewritten to
// :0/foo just because some unrelated endpoint also uses :0.
for _, endpoint := range *endpoints {
_, _ = AnalyzeURL(rewritePort(endpoint.Endpoint, wildcardPort), analyzer)
_, _ = AnalyzeURL(endpoint.Endpoint, analyzer)
}

// Second pass: process endpoints
// Second pass: process endpoints with their original ports.
var newEndpoints []*types.HTTPEndpoint
for _, endpoint := range *endpoints {
ep := endpoint
ep.Endpoint = rewritePort(ep.Endpoint, wildcardPort)
processedEndpoint, err := ProcessEndpoint(&ep, analyzer, newEndpoints)
if processedEndpoint == nil && err == nil || err != nil {
continue
}
newEndpoints = append(newEndpoints, processedEndpoint)
}

// Cross-port folding happens here: only same-(path, direction) siblings
// of an explicit :0 wildcard get absorbed into it.
newEndpoints = MergeDuplicateEndpoints(newEndpoints)

return convertPointerToValueSlice(newEndpoints)
Expand Down Expand Up @@ -125,6 +108,19 @@ func splitEndpointPortAndPath(endpoint string) (string, string) {
return s[:idx], s[idx:]
}

// MergeDuplicateEndpoints folds duplicates and merges same-path specific-port
// endpoints into a wildcard-port (:0) sibling. The folding is symmetric:
//
// - If a specific-port endpoint is encountered AFTER its :0 sibling, the
// specific-port methods/headers are merged INTO the wildcard entry.
// - If a specific-port endpoint is encountered BEFORE its :0 sibling, it
// is initially recorded; when the wildcard arrives we sweep `seen` for
// same-(path, direction) specific-port siblings, fold them into the
// wildcard, and remove them from the output.
//
// This contract was tightened on the back of upstream review on
// kubescape/storage#316 — a single :0 entry must NOT cause unrelated
// concrete-port endpoints to be wildcarded; only same-path siblings fold.
func MergeDuplicateEndpoints(endpoints []*types.HTTPEndpoint) []*types.HTTPEndpoint {
seen := make(map[string]*types.HTTPEndpoint)
var newEndpoints []*types.HTTPEndpoint
Expand All @@ -137,15 +133,38 @@ func MergeDuplicateEndpoints(endpoints []*types.HTTPEndpoint) []*types.HTTPEndpo
continue
}

// Check if a wildcard port variant already exists (port 0 means any port)
port, pathPart := splitEndpointPortAndPath(endpoint.Endpoint)
if !isWildcardPort(port) {
wildcardKey := fmt.Sprintf(":%s%s|%s", "0", pathPart, endpoint.Direction)
if existing, found := seen[wildcardKey]; found {
existing.Methods = MergeStrings(existing.Methods, endpoint.Methods)
mergeHeaders(existing, endpoint)
continue

if isWildcardPort(port) {
// Wildcard arriving after specific-port siblings — sweep `seen`
// for any same-(path, direction) specific-port entries already
// recorded, fold them into the wildcard, then drop them from
// the output slice.
absorbed := false
for k, e := range seen {
ePort, ePath := splitEndpointPortAndPath(e.Endpoint)
if isWildcardPort(ePort) || ePath != pathPart || e.Direction != endpoint.Direction {
continue
}
endpoint.Methods = MergeStrings(endpoint.Methods, e.Methods)
mergeHeaders(endpoint, e)
delete(seen, k)
newEndpoints = removeEndpoint(newEndpoints, e)
absorbed = true
}
seen[key] = endpoint
newEndpoints = append(newEndpoints, endpoint)
_ = absorbed
continue
}

// Specific port: if a wildcard sibling for the same (path, direction)
// is already in `seen`, fold this entry into it.
wildcardKey := fmt.Sprintf(":0%s|%s", pathPart, endpoint.Direction)
if existing, found := seen[wildcardKey]; found {
existing.Methods = MergeStrings(existing.Methods, endpoint.Methods)
mergeHeaders(existing, endpoint)
continue
}

seen[key] = endpoint
Expand All @@ -155,6 +174,18 @@ func MergeDuplicateEndpoints(endpoints []*types.HTTPEndpoint) []*types.HTTPEndpo
return newEndpoints
}

// removeEndpoint returns a new slice with the first occurrence of target
// removed (compared by pointer). Used by MergeDuplicateEndpoints when a
// previously-recorded specific-port entry is absorbed into a later wildcard.
func removeEndpoint(s []*types.HTTPEndpoint, target *types.HTTPEndpoint) []*types.HTTPEndpoint {
for i, e := range s {
if e == target {
return append(s[:i], s[i+1:]...)
}
}
return s
}

func getEndpointKey(endpoint *types.HTTPEndpoint) string {
return fmt.Sprintf("%s|%s", endpoint.Endpoint, endpoint.Direction)
}
Expand Down
147 changes: 135 additions & 12 deletions pkg/registry/file/dynamicpathdetector/tests/analyze_endpoints_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ func TestAnalyzeEndpoints(t *testing.T) {
},
},
{
// A single :0 (wildcard-port) entry MUST NOT contaminate
// unrelated concrete-port endpoints. Only same-(path, direction)
// siblings of an explicit :0 entry are folded into it; here the
// :80 and :8770 paths are distinct from the :0 path, so each
// endpoint stays on its own port. Regression test for the bug
// flagged in upstream review on kubescape/storage#316.
name: "Test with 0 port",
input: []types.HTTPEndpoint{
{
Expand All @@ -88,10 +94,23 @@ func TestAnalyzeEndpoints(t *testing.T) {
Methods: []string{"POST"},
},
},
// NOTE: the analyzer trie persists across subtests in this
// table (analyzer is created outside the t.Run loop), so
// /users/\u22ef/posts/\u22ef has already been observed enough times
// for the :80 entry's trailing segment to be dynamicized;
// the :8770 path is fresh in the trie so it stays concrete.
expected: []types.HTTPEndpoint{
{
Endpoint: ":0/users/\u22ef/posts/\u22ef",
Methods: []string{"GET", "POST"},
Endpoint: ":0/users/123/posts/\u22ef",
Methods: []string{"GET"},
},
{
Endpoint: ":80/users/\u22ef/posts/\u22ef",
Methods: []string{"POST"},
},
{
Endpoint: ":8770/users/blub/posts/101",
Methods: []string{"POST"},
},
},
},
Expand Down Expand Up @@ -238,52 +257,120 @@ func TestAnalyzeEndpointsWithInvalidURL(t *testing.T) {
assert.Equal(t, 0, len(result))
}

func TestAnalyzeEndpointsWildcardPortAbsorbsSpecificPort(t *testing.T) {
// TestAnalyzeEndpoints_WildcardDoesNotContaminateUnrelatedPaths pins the bug
// flagged by upstream review on kubescape/storage#316: a single wildcard-port
// endpoint must NOT cause unrelated specific-port endpoints (different path)
// to be rewritten to :0. Only same-path siblings should fold into the wildcard.
func TestAnalyzeEndpoints_WildcardDoesNotContaminateUnrelatedPaths(t *testing.T) {
analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil)

input := []types.HTTPEndpoint{
{
Endpoint: ":0/users/123",
Endpoint: ":0/health",
Methods: []string{"GET"},
Direction: "outbound",
},
{
Endpoint: ":80/users/456",
Endpoint: ":443/login",
Methods: []string{"POST"},
Direction: "outbound",
},
}

result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer)

endpoints := make(map[string]bool, len(result))
for _, ep := range result {
port := ep.Endpoint[:len(":0")]
assert.Equal(t, ":0", port, "endpoint %s should have wildcard port", ep.Endpoint)
endpoints[ep.Endpoint] = true
}
assert.Equal(t, 2, len(result), "unrelated paths must not be merged: got %v", endpoints)
assert.True(t, endpoints[":0/health"], "wildcard endpoint :0/health must be preserved")
assert.True(t, endpoints[":443/login"], "specific-port endpoint :443/login must keep its port (no wildcard sibling on the same path)")
}

func TestAnalyzeEndpointsWildcardPortAfterSpecificPorts(t *testing.T) {
// TestAnalyzeEndpoints_SamePathSpecificFirstThenWildcard exercises the
// reverse-order case: the specific-port endpoint comes first in the slice,
// then a wildcard sibling on the SAME path. The two must merge into the
// wildcard. Without symmetric merging in MergeDuplicateEndpoints, the
// specific endpoint sticks around alongside the wildcard.
func TestAnalyzeEndpoints_SamePathSpecificFirstThenWildcard(t *testing.T) {
analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil)

input := []types.HTTPEndpoint{
{
Endpoint: ":80/api/data",
Endpoint: ":443/login",
Methods: []string{"POST"},
Direction: "outbound",
},
{
Endpoint: ":0/login",
Methods: []string{"GET"},
Direction: "outbound",
},
}

result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer)

assert.Equal(t, 1, len(result), "specific-port sibling must fold into the wildcard regardless of order")
assert.Equal(t, ":0/login", result[0].Endpoint)
assert.ElementsMatch(t, []string{"GET", "POST"}, result[0].Methods, "methods from both ports must be merged")
}

// TestAnalyzeEndpoints_NoWildcardKeepsSpecificPort asserts that without ANY
// wildcard sibling, specific-port endpoints stay specific. A regression here
// would mean the analyzer is wildcarding too aggressively.
func TestAnalyzeEndpoints_NoWildcardKeepsSpecificPort(t *testing.T) {
analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil)

input := []types.HTTPEndpoint{
{
Endpoint: ":443/login",
Methods: []string{"POST"},
Direction: "outbound",
},
}

result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer)

assert.Equal(t, 1, len(result))
assert.Equal(t, ":443/login", result[0].Endpoint, "no wildcard sibling => port must be preserved")
}

// TestAnalyzeEndpoints_OnlyMatchingPathsFoldIntoWildcard combines the
// wildcard-contamination case with the same-path-merge case to verify both
// invariants hold simultaneously. :0/api absorbs :80/api (same path); but
// :443/admin (different path, no wildcard sibling) keeps its port.
func TestAnalyzeEndpoints_OnlyMatchingPathsFoldIntoWildcard(t *testing.T) {
analyzer := dynamicpathdetector.NewPathAnalyzerWithConfigs(dynamicpathdetector.EndpointDynamicThreshold, nil)

input := []types.HTTPEndpoint{
{
Endpoint: ":0/api",
Methods: []string{"GET"},
Direction: "outbound",
},
{
Endpoint: ":0/api/info",
Endpoint: ":80/api",
Methods: []string{"POST"},
Direction: "outbound",
},
{
Endpoint: ":443/admin",
Methods: []string{"DELETE"},
Direction: "outbound",
},
}

result := dynamicpathdetector.AnalyzeEndpoints(&input, analyzer)

endpoints := make(map[string][]string, len(result))
for _, ep := range result {
port := ep.Endpoint[:len(":0")]
assert.Equal(t, ":0", port, "endpoint %s should have wildcard port", ep.Endpoint)
endpoints[ep.Endpoint] = ep.Methods
}

assert.Equal(t, 2, len(result), "expected :0/api and :443/admin, got %v", endpoints)
assert.ElementsMatch(t, []string{"GET", "POST"}, endpoints[":0/api"], "/api siblings must merge into wildcard")
assert.ElementsMatch(t, []string{"DELETE"}, endpoints[":443/admin"], ":443/admin must NOT be wildcarded — no wildcard sibling on /admin")
}

func TestAnalyzeEndpointsMultiplePortsMergeIntoWildcard(t *testing.T) {
Expand Down Expand Up @@ -332,3 +419,39 @@ func TestMergeDuplicateEndpointsWildcardPort(t *testing.T) {
assert.Equal(t, ":0/api/data", result[0].Endpoint)
assert.Equal(t, []string{"GET", "POST"}, result[0].Methods)
}

// TestMergeDuplicateEndpoints_SpecificFirstThenWildcard pins the reverse
// order — specific-port endpoint encountered first, wildcard sibling second.
// Without symmetric merging in MergeDuplicateEndpoints both entries survive,
// which CodeRabbit flagged on PR #316. Locking the contract here.
func TestMergeDuplicateEndpoints_SpecificFirstThenWildcard(t *testing.T) {
specificEP := &types.HTTPEndpoint{
Endpoint: ":80/api/data",
Methods: []string{"POST"},
Direction: "outbound",
}
wildcardEP := &types.HTTPEndpoint{
Endpoint: ":0/api/data",
Methods: []string{"GET"},
Direction: "outbound",
}

result := dynamicpathdetector.MergeDuplicateEndpoints([]*types.HTTPEndpoint{specificEP, wildcardEP})

assert.Equal(t, 1, len(result), "wildcard sibling must absorb the earlier specific-port entry")
assert.Equal(t, ":0/api/data", result[0].Endpoint)
assert.ElementsMatch(t, []string{"GET", "POST"}, result[0].Methods)
}

// TestMergeDuplicateEndpoints_NoWildcardKeepsAllSpecificPorts asserts that
// without a wildcard sibling, distinct (port,path) pairs all survive.
// A regression here would mean the merge is collapsing too aggressively.
func TestMergeDuplicateEndpoints_NoWildcardKeepsAllSpecificPorts(t *testing.T) {
a := &types.HTTPEndpoint{Endpoint: ":80/api/data", Methods: []string{"GET"}, Direction: "outbound"}
b := &types.HTTPEndpoint{Endpoint: ":443/api/data", Methods: []string{"POST"}, Direction: "outbound"}
c := &types.HTTPEndpoint{Endpoint: ":8080/api/data", Methods: []string{"PUT"}, Direction: "outbound"}

result := dynamicpathdetector.MergeDuplicateEndpoints([]*types.HTTPEndpoint{a, b, c})

assert.Equal(t, 3, len(result), "no wildcard sibling => all specific-port endpoints must be kept")
}
Loading