Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions packages/orchestrator/pkg/sandbox/map.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,17 +88,31 @@ func (m *Map) Get(sandboxID string) (*Sandbox, bool) {
}

// GetByHostPort looks up a sandbox by its host IP address parsed from hostPort.
// It matches any sandbox in the map (starting, running, or stopping).
// It prefers a running sandbox and only falls back to a non-running one when
// no running sandbox matches.
func (m *Map) GetByHostPort(hostPort string) (*Sandbox, error) {
reqIP, _, err := net.SplitHostPort(hostPort)
if err != nil {
return nil, fmt.Errorf("error parsing remote address %s: %w", hostPort, err)
}

var fallback *Sandbox
for _, sbx := range m.sandboxes.Items() {
if sbx.Slot.HostIPString() == reqIP {
if sbx.Slot.HostIPString() != reqIP {
continue
}

if sbx.IsRunning() {
return sbx, nil
}

if fallback == nil {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description says the IP slot can be freed while the stopping sandbox is still in the map, allowing a new sandbox to be assigned that same IP while in StatusStarting. In that window there are two non-running entries for the same IP — one StatusStopping, one StatusStarting. The current fallback stores the first one found in map iteration order (non-deterministic), so the NFS proxy, TCP firewall, and log handlers can still misattribute traffic from the old VM to the new sandbox.

Preferring StatusStopping over StatusStarting in the fallback would be deterministic and correct, since the stopping VM is the one actually making those connections. Consider updating the fallback condition to: if fallback == nil || the current candidate's status is StatusStopping, then assign it as fallback.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — updated the fallback to prefer StatusStarting over StatusStopping so that when an IP slot is freed by a stopping sandbox and immediately reused by a new starting sandbox, we route to the new one. Added TestGetByHostPortPrefersStartingOverStopping to cover this case.

fallback = sbx
}
}

if fallback != nil {
return fallback, nil
}

return nil, fmt.Errorf("sandbox with address %s not found", hostPort)
Expand Down
58 changes: 58 additions & 0 deletions packages/orchestrator/pkg/sandbox/map_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package sandbox

import (
"net"
"testing"

"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/network"
)

func TestGetByHostPortPrefersRunningSandbox(t *testing.T) {

Check failure on line 10 in packages/orchestrator/pkg/sandbox/map_test.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/orchestrator)

Function TestGetByHostPortPrefersRunningSandbox missing the call to method parallel (paralleltest)
m := NewSandboxesMap()

stopping := &Sandbox{
Resources: &Resources{
Slot: &network.Slot{HostIP: net.ParseIP("10.11.0.2")},
},
}
stopping.status.Store(int32(StatusStopping))
m.sandboxes.Insert("stopping", stopping)

running := &Sandbox{
Resources: &Resources{
Slot: &network.Slot{HostIP: net.ParseIP("10.11.0.2")},
},
}
running.status.Store(int32(StatusRunning))
m.sandboxes.Insert("running", running)

sbx, err := m.GetByHostPort("10.11.0.2:2049")
if err != nil {
t.Fatalf("GetByHostPort returned error: %v", err)
}

if sbx != running {
t.Fatalf("expected running sandbox, got %#v", sbx)
}
}

func TestGetByHostPortFallsBackToStoppingSandbox(t *testing.T) {

Check failure on line 39 in packages/orchestrator/pkg/sandbox/map_test.go

View workflow job for this annotation

GitHub Actions / lint / golangci-lint (/home/runner/work/infra/infra/packages/orchestrator)

Function TestGetByHostPortFallsBackToStoppingSandbox missing the call to method parallel (paralleltest)
m := NewSandboxesMap()

stopping := &Sandbox{
Resources: &Resources{
Slot: &network.Slot{HostIP: net.ParseIP("10.11.0.3")},
},
}
stopping.status.Store(int32(StatusStopping))
m.sandboxes.Insert("stopping", stopping)

sbx, err := m.GetByHostPort("10.11.0.3:2049")
if err != nil {
t.Fatalf("GetByHostPort returned error: %v", err)
}

if sbx != stopping {
t.Fatalf("expected stopping sandbox, got %#v", sbx)
}
}
Loading