From aa98516679161e4003d4bc56b101717026f06d8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Salazar?= Date: Sat, 18 Apr 2026 01:50:39 -0600 Subject: [PATCH] tx: detect cycles in forEachPageInternal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit forEachPageInternal walks a branch page's children without tracking pages it has already entered, so a corrupted db whose branch references one of its own ancestors drives the goroutine stack to overflow. Thread a visited set through the recursion and bail on a revisit; fn still fires on the back- edge so verifyPageReachable's "multiple references" diagnostic is emitted once per cycle. Related to #701 / #581. Signed-off-by: Iván Salazar --- tx.go | 17 +++++++--- tx_whitebox_test.go | 80 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 tx_whitebox_test.go diff --git a/tx.go b/tx.go index aa0066bd3..aafccfb28 100644 --- a/tx.go +++ b/tx.go @@ -645,20 +645,29 @@ func (tx *Tx) page(id common.Pgid) *common.Page { func (tx *Tx) forEachPage(pgidnum common.Pgid, fn func(*common.Page, int, []common.Pgid)) { stack := make([]common.Pgid, 10) stack[0] = pgidnum - tx.forEachPageInternal(stack[:1], fn) + tx.forEachPageInternal(stack[:1], map[common.Pgid]struct{}{}, fn) } -func (tx *Tx) forEachPageInternal(pgidstack []common.Pgid, fn func(*common.Page, int, []common.Pgid)) { - p := tx.page(pgidstack[len(pgidstack)-1]) +func (tx *Tx) forEachPageInternal(pgidstack []common.Pgid, visited map[common.Pgid]struct{}, fn func(*common.Page, int, []common.Pgid)) { + pgid := pgidstack[len(pgidstack)-1] + p := tx.page(pgid) // Execute function. fn(p, len(pgidstack)-1, pgidstack) + // Stop descending on a revisit so a corrupted db with a page cycle + // cannot drive this recursion to stack overflow. fn still runs above, + // so verifyPageReachable's "multiple references" diagnostic fires. + if _, ok := visited[pgid]; ok { + return + } + visited[pgid] = struct{}{} + // Recursively loop over children. if p.IsBranchPage() { for i := 0; i < int(p.Count()); i++ { elem := p.BranchPageElement(uint16(i)) - tx.forEachPageInternal(append(pgidstack, elem.Pgid()), fn) + tx.forEachPageInternal(append(pgidstack, elem.Pgid()), visited, fn) } } } diff --git a/tx_whitebox_test.go b/tx_whitebox_test.go new file mode 100644 index 000000000..e22ea608c --- /dev/null +++ b/tx_whitebox_test.go @@ -0,0 +1,80 @@ +package bbolt + +import ( + "fmt" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "go.etcd.io/bbolt/internal/common" + "go.etcd.io/bbolt/internal/guts_cli" + "go.etcd.io/bbolt/internal/surgeon" +) + +// TestTx_forEachPage_CycleTerminates corrupts a db so that a branch page's +// child list loops back on itself, then verifies that tx.forEachPage +// terminates instead of recursing until the goroutine stack overflows. +// See issue #701 for the real-world corruption pattern. +func TestTx_forEachPage_CycleTerminates(t *testing.T) { + path := filepath.Join(t.TempDir(), "db") + db, err := Open(path, 0600, nil) + require.NoError(t, err) + require.NoError(t, db.Update(func(tx *Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte("data")) + if err != nil { + return err + } + for i := 0; i < 500; i++ { + if err := b.Put([]byte(fmt.Sprintf("%04d", i)), make([]byte, 100)); err != nil { + return err + } + } + return nil + })) + require.NoError(t, db.Close()) + + // Pick a branch ancestor and one of its leaf descendants; overwriting the + // leaf with a copy of the branch leaves the leaf as a branch whose child + // list still references the leaf's own pgid, forming a cycle. + xray := surgeon.NewXRay(path) + paths, err := xray.FindPathsToKey([]byte("0001")) + require.NoError(t, err) + require.NotEmpty(t, paths) + p0 := paths[0] + require.GreaterOrEqual(t, len(p0), 2, "need at least one branch above the leaf") + ancestor := p0[len(p0)-2] + leaf := p0[len(p0)-1] + require.NoError(t, surgeon.CopyPage(path, ancestor, leaf)) + + ancestorPage, _, err := guts_cli.ReadPage(path, uint64(ancestor)) + require.NoError(t, err) + require.True(t, ancestorPage.IsBranchPage()) + var hasLeafAsChild bool + for i := uint16(0); i < ancestorPage.Count(); i++ { + if ancestorPage.BranchPageElement(i).Pgid() == common.Pgid(leaf) { + hasLeafAsChild = true + break + } + } + require.True(t, hasLeafAsChild, "expected ancestor to reference the leaf directly") + + db, err = Open(path, 0600, nil) + require.NoError(t, err) + defer func() { _ = db.Close() }() + + require.NoError(t, db.View(func(tx *Tx) error { + rootPage := tx.Bucket([]byte("data")).RootPage() + var count int + leafVisits := 0 + tx.forEachPage(rootPage, func(p *common.Page, _ int, _ []common.Pgid) { + count++ + if p.Id() == common.Pgid(leaf) { + leafVisits++ + } + }) + require.Less(t, count, 10_000, "forEachPage walked %d pages; cycle detection likely missing", count) + require.GreaterOrEqual(t, leafVisits, 2, "expected fn to fire on the cycle back-edge so duplicate-reference diagnostics still run") + return nil + })) +}