Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 43 additions & 16 deletions op-supernode/supernode/chain_container/chain_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ type virtualNodeFactory func(cfg *opnodecfg.Config, log gethlog.Logger, initOver
type ResetCallback func(chainID eth.ChainID, timestamp uint64, invalidatedBlock eth.BlockRef)

type simpleChainContainer struct {
// vnMu protects vn. The chain container restart loop writes vn concurrently
// with activity goroutines (e.g. Interop.checkChainsReady) reading it, so an
// unsynchronized access produces a torn interface read: non-nil type with
// nil data pointer, which slips past a plain `vn == nil` check and panics
// on the next method dispatch.
vnMu sync.RWMutex
vn virtual_node.VirtualNode
vncfg *opnodecfg.Config
cfg config.CLIConfig
Expand Down Expand Up @@ -229,6 +235,22 @@ func defaultVirtualNodeFactory(cfg *opnodecfg.Config, log gethlog.Logger, initOv
return virtual_node.NewVirtualNode(cfg, log, initOverload, appVersion)
}

// getVN returns a consistent snapshot of c.vn. Always read c.vn through this
// helper; a bare read races with the restart loop's assignment.
func (c *simpleChainContainer) getVN() virtual_node.VirtualNode {
c.vnMu.RLock()
defer c.vnMu.RUnlock()
return c.vn
}

// setVN writes c.vn under the lock. Used by the restart loop when it installs
// a new virtual node on each iteration.
func (c *simpleChainContainer) setVN(vn virtual_node.VirtualNode) {
c.vnMu.Lock()
defer c.vnMu.Unlock()
c.vn = vn
}

func (c *simpleChainContainer) subPath(path string) string {
return filepath.Join(c.cfg.DataDir, c.chainID.String(), path)
}
Expand Down Expand Up @@ -263,7 +285,7 @@ func (c *simpleChainContainer) Start(ctx context.Context) error {
c.initOverload.SuperAuthority = c
}
// Pass in the chain container as a SuperAuthority
c.vn = c.virtualNodeFactory(c.vncfg, c.log, c.initOverload, c.appVersion, c)
c.setVN(c.virtualNodeFactory(c.vncfg, c.log, c.initOverload, c.appVersion, c))
if c.pause.Load() {
// Check for stop/cancellation even while paused, so teardown doesn't hang.
// Without this, a stuck pause (e.g. from RewindEngine exiting before Resume)
Expand All @@ -281,19 +303,20 @@ func (c *simpleChainContainer) Start(ctx context.Context) error {
}

// start the virtual node
err := c.vn.Start(ctx)
vn := c.getVN()
err := vn.Start(ctx)
if err != nil {
c.log.Warn("virtual node exited with error", "vn_id", c.vn, "error", err)
c.log.Warn("virtual node exited with error", "vn_id", vn, "error", err)
} else {
c.log.Info("virtual node exited", "vn_id", c.vn)
c.log.Info("virtual node exited", "vn_id", vn)
}

// always stop the virtual node after it exits
stopCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
if stopErr := c.vn.Stop(stopCtx); stopErr != nil {
if stopErr := vn.Stop(stopCtx); stopErr != nil {
c.log.Error("error stopping virtual node", "error", stopErr)
} else {
c.log.Info("virtual node stopped", "vn_id", c.vn)
c.log.Info("virtual node stopped", "vn_id", vn)
}

cancel()
Expand Down Expand Up @@ -322,8 +345,8 @@ func (c *simpleChainContainer) Stop(ctx context.Context) error {
c.rollupClient.Close()
}

if c.vn != nil {
if err := c.vn.Stop(stopCtx); err != nil {
if vn := c.getVN(); vn != nil {
if err := vn.Stop(stopCtx); err != nil {
c.log.Error("error stopping virtual node", "error", err)
}
}
Expand Down Expand Up @@ -397,13 +420,14 @@ func (c *simpleChainContainer) LocalSafeBlockAtTimestamp(ctx context.Context, ts

// SyncStatus returns the in-process op-node sync status for this chain.
func (c *simpleChainContainer) SyncStatus(ctx context.Context) (*eth.SyncStatus, error) {
if c.vn == nil {
vn := c.getVN()
if vn == nil {
if c.log != nil {
c.log.Warn("SyncStatus: virtual node not initialized")
}
return nil, virtual_node.ErrVirtualNodeNotRunning
}
st, err := c.vn.SyncStatus(ctx)
st, err := vn.SyncStatus(ctx)
if err != nil {
return nil, err
}
Expand All @@ -424,7 +448,8 @@ func (c *simpleChainContainer) OutputRootAtL2BlockNumber(ctx context.Context, l2

// safeDBAtL2 delegates to the virtual node to resolve the earliest L1 at which the L2 became safe.
func (c *simpleChainContainer) safeDBAtL2(ctx context.Context, l2 eth.BlockID) (eth.BlockID, error) {
if c.vn == nil {
vn := c.getVN()
if vn == nil {
return eth.BlockID{}, fmt.Errorf("virtual node not initialized")
}
status, err := c.SyncStatus(ctx)
Expand All @@ -433,7 +458,7 @@ func (c *simpleChainContainer) safeDBAtL2(ctx context.Context, l2 eth.BlockID) (
}
currentL1 := status.CurrentL1
c.log.Debug("safeDBAtL2", "l2", l2, "currentL1", currentL1, "err", err)
l1, err := c.vn.L1AtSafeHead(ctx, l2)
l1, err := vn.L1AtSafeHead(ctx, l2)
if err != nil {
// Map L1AtSafeHeadNotFound to ethereum.NotFound so callers treat chain lag as "not ready"
if errors.Is(err, virtual_node.ErrL1AtSafeHeadNotFound) {
Expand Down Expand Up @@ -572,7 +597,8 @@ func (c *simpleChainContainer) RewindEngine(ctx context.Context, timestamp uint6
}
defer c.resetting.Store(false)

if c.vn == nil {
vn := c.getVN()
if vn == nil {
return fmt.Errorf("virtual node not initialized")
}
if c.engine == nil {
Expand All @@ -591,7 +617,7 @@ func (c *simpleChainContainer) RewindEngine(ctx context.Context, timestamp uint6
c.log.Info("chain_container/RewindEngine: paused container")

// stop the vn
err = c.vn.Stop(ctx)
err = vn.Stop(ctx)
if err != nil {
return err
}
Expand Down Expand Up @@ -643,10 +669,11 @@ func (c *simpleChainContainer) PauseAndStopVN(ctx context.Context) error {
if err := c.Pause(ctx); err != nil {
return err
}
if c.vn == nil {
vn := c.getVN()
if vn == nil {
return nil
}
return c.vn.Stop(ctx)
return vn.Stop(ctx)
}

// SetResetCallback sets a callback that is invoked when the chain resets.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ func NewVirtualNode(cfg *opnodecfg.Config, log gethlog.Logger, initOverload *rol
}

func (v *simpleVirtualNode) Start(ctx context.Context) error {
if v == nil {
return ErrVirtualNodeNotRunning
}
// Accquire lock while setting up inner node
v.mu.Lock()
if v.state != VNStateNotStarted {
Expand Down Expand Up @@ -174,6 +177,9 @@ func (v *simpleVirtualNode) Start(ctx context.Context) error {
}

func (v *simpleVirtualNode) Stop(ctx context.Context) error {
if v == nil {
return nil
}
v.mu.Lock()
defer v.mu.Unlock()

Expand All @@ -198,6 +204,9 @@ func (v *simpleVirtualNode) State() VNState {

// SafeHeadAtL1 returns the recorded mapping of L1 block -> L2 safe head at or before the given L1 block number.
func (v *simpleVirtualNode) SafeHeadAtL1(ctx context.Context, l1BlockNum uint64) (eth.BlockID, eth.BlockID, error) {
if v == nil {
return eth.BlockID{}, eth.BlockID{}, ErrVirtualNodeNotRunning
}
v.mu.Lock()
inner := v.inner
v.mu.Unlock()
Expand All @@ -216,6 +225,9 @@ var ErrL1AtSafeHeadNotFound = errors.New("l1 at safe head not found")
// L1AtSafeHead finds the earliest L1 block at which the provided L2 block became local safe,
// using the monotonicity of SafeDB (L2 safe head number is non-decreasing over L1).
func (v *simpleVirtualNode) L1AtSafeHead(ctx context.Context, target eth.BlockID) (eth.BlockID, error) {
if v == nil {
return eth.BlockID{}, ErrVirtualNodeNotRunning
}
Comment on lines +228 to +230
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking if self is nil is a weird pattern - we probably should just put these checks in the calling code.

v.mu.Lock()
inner := v.inner
v.mu.Unlock()
Expand Down Expand Up @@ -279,6 +291,9 @@ func (v *simpleVirtualNode) L1AtSafeHead(ctx context.Context, target eth.BlockID
}

func (v *simpleVirtualNode) SyncStatus(ctx context.Context) (*eth.SyncStatus, error) {
if v == nil {
return nil, ErrVirtualNodeNotRunning
}
v.mu.Lock()
inner := v.inner
v.mu.Unlock()
Expand Down
Loading