diff --git a/lnd.go b/lnd.go index 76b08a114a1..007537c98f6 100644 --- a/lnd.go +++ b/lnd.go @@ -141,6 +141,22 @@ var errStreamIsolationWithProxySkip = errors.New( "while stream isolation is enabled, the TOR proxy may not be skipped", ) +// ErrShutdownRequested is returned from Main when lnd shuts down due to an +// internal error condition such as a backend health check failure. This +// ensures the process exits with a non-zero code. +var ErrShutdownRequested = errors.New("lnd was shut down due to an " + + "internal request (e.g. chain backend failure)") + +// handleShutdown returns ErrShutdownRequested if the shutdown was triggered +// internally (e.g. by a health check failure), or nil if it was triggered by +// an external OS signal. +func handleShutdown(interceptor signal.Interceptor) error { + if interceptor.ShutdownWasRequested() { + return ErrShutdownRequested + } + return nil +} + // Main is the true entry point for lnd. It accepts a fully populated and // validated main configuration struct and an optional listener config struct. // This function starts all main system components then blocks until a signal @@ -778,7 +794,7 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg, return mkErr("unable to start server", err) case <-interceptor.ShutdownChannel(): - return nil + return handleShutdown(interceptor) } // We transition the server state to Active, as the server is up. @@ -803,7 +819,7 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg, // Wait for shutdown signal from either a graceful server stop or from // the interrupt handler. <-interceptor.ShutdownChannel() - return nil + return handleShutdown(interceptor) } // bakeMacaroon creates a new macaroon with newest version and the given diff --git a/signal/signal.go b/signal/signal.go index 8b4a01485ce..49f4ccc08f6 100644 --- a/signal/signal.go +++ b/signal/signal.go @@ -111,6 +111,11 @@ type Interceptor struct { // close this channel. quit chan struct{} + // shutdownRequested is set to true when an internal shutdown is the + // first trigger (e.g. health check failure). Pointer-typed so the + // value is shared across copies of Interceptor. + shutdownRequested *atomic.Bool + // Notifier handles sending shutdown notifications. Notifier Notifier } @@ -127,6 +132,7 @@ func Intercept() (Interceptor, error) { shutdownChannel: make(chan struct{}), shutdownRequestChannel: make(chan struct{}), quit: make(chan struct{}), + shutdownRequested: &atomic.Bool{}, } signalsToCatch := []os.Signal{ @@ -179,6 +185,14 @@ func (c *Interceptor) mainInterruptHandler() { case <-c.shutdownRequestChannel: log.Infof("Received shutdown request.") + + // Only mark the shutdown as internally requested if + // this is the first shutdown trigger. This prevents + // a late internal request from overriding a + // user-initiated signal shutdown. + if !isShutdown { + c.shutdownRequested.Store(true) + } shutdown() case <-c.quit: @@ -222,6 +236,13 @@ func (c *Interceptor) RequestShutdown() { } } +// ShutdownWasRequested returns true if the shutdown was initiated internally +// via RequestShutdown (e.g. due to a health check failure) rather than by an +// external OS signal. +func (c *Interceptor) ShutdownWasRequested() bool { + return c.shutdownRequested.Load() +} + // ShutdownChannel returns the channel that will be closed once the main // interrupt handler has exited. func (c *Interceptor) ShutdownChannel() <-chan struct{} { diff --git a/signal/signal_test.go b/signal/signal_test.go new file mode 100644 index 00000000000..7982f5fcd94 --- /dev/null +++ b/signal/signal_test.go @@ -0,0 +1,86 @@ +package signal + +import ( + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// waitForShutdown waits for the interceptor's shutdown channel to close. +func waitForShutdown(t *testing.T, interceptor Interceptor) { + t.Helper() + + select { + case <-interceptor.ShutdownChannel(): + case <-time.After(time.Second): + t.Fatal("timed out waiting for shutdown channel") + } + + // Wait for mainInterruptHandler goroutine to fully exit and reset + // the global started flag so the next test can call Intercept(). + require.Eventually(t, func() bool { + return atomic.LoadInt32(&started) == 0 + }, time.Second, 10*time.Millisecond, "interceptor did not reset") +} + +// TestRequestShutdownSetsFlag tests that calling RequestShutdown sets the +// shutdownRequested flag, which is used to distinguish internal shutdown +// requests (e.g. chain backend failure) from external OS signals. +func TestRequestShutdownSetsFlag(t *testing.T) { + interceptor, err := Intercept() + require.NoError(t, err) + + // Before any shutdown, the flag should not be set. + require.False(t, interceptor.ShutdownWasRequested()) + + // Request an internal shutdown. + interceptor.RequestShutdown() + + waitForShutdown(t, interceptor) + + // The flag should now be set after the handler processed the request. + require.True(t, interceptor.ShutdownWasRequested()) +} + +// TestOSSignalDoesNotSetRequestedFlag tests that an OS signal based shutdown +// does not set the shutdownRequested flag, ensuring a clean exit code 0. +func TestOSSignalDoesNotSetRequestedFlag(t *testing.T) { + interceptor, err := Intercept() + require.NoError(t, err) + + // Simulate an OS signal. + interceptor.interruptChannel <- nil + + waitForShutdown(t, interceptor) + + // The flag should NOT be set for OS signal shutdowns. + require.False(t, interceptor.ShutdownWasRequested()) +} + +// TestOSSignalThenRequestShutdownNoFlag tests the race condition where a user +// sends SIGINT first, and then an internal component calls RequestShutdown. +// The first shutdown cause (OS signal) should win, so the flag must remain +// unset and the process should exit with code 0. +func TestOSSignalThenRequestShutdownNoFlag(t *testing.T) { + interceptor, err := Intercept() + require.NoError(t, err) + + // Simulate an OS signal to initiate shutdown first. + interceptor.interruptChannel <- nil + + // Wait for the shutdown process to start before sending the second + // request. This is more robust than a fixed sleep. + require.Eventually(t, func() bool { + return !interceptor.Alive() + }, time.Second, 10*time.Millisecond, "shutdown did not start") + + // Now an internal component also requests shutdown (late arrival). + interceptor.RequestShutdown() + + waitForShutdown(t, interceptor) + + // The flag must NOT be set -- the OS signal arrived first. + require.False(t, interceptor.ShutdownWasRequested()) +}