diff --git a/packages/orchestrator/go.mod b/packages/orchestrator/go.mod index 50ba9be7f5..23a4b4ea9e 100644 --- a/packages/orchestrator/go.mod +++ b/packages/orchestrator/go.mod @@ -17,6 +17,7 @@ require ( cloud.google.com/go/storage v1.59.2 connectrpc.com/connect v1.18.1 github.com/Merovius/nbd v0.0.0-20240812113926-fd65a54c9949 + github.com/RoaringBitmap/roaring/v2 v2.18.0 github.com/aws/aws-sdk-go-v2/config v1.32.6 github.com/aws/aws-sdk-go-v2/credentials v1.19.6 github.com/aws/aws-sdk-go-v2/service/ecr v1.44.0 @@ -97,7 +98,6 @@ require ( github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect - github.com/RoaringBitmap/roaring/v2 v2.16.1 // indirect github.com/andybalholm/brotli v1.2.0 // indirect github.com/armon/go-metrics v0.4.1 // indirect github.com/aws/aws-sdk-go-v2 v1.41.0 // indirect diff --git a/packages/orchestrator/go.sum b/packages/orchestrator/go.sum index 284278c0c7..da4ca56966 100644 --- a/packages/orchestrator/go.sum +++ b/packages/orchestrator/go.sum @@ -112,8 +112,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb0 github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/RoaringBitmap/roaring/v2 v2.16.1 h1:HmKj5kwyUJVvCuaM8BdLSHVlYn3O9hzgSpaog6bSxmY= -github.com/RoaringBitmap/roaring/v2 v2.16.1/go.mod h1:eq4wdNXxtJIS/oikeCzdX1rBzek7ANzbth041hrU8Q4= +github.com/RoaringBitmap/roaring/v2 v2.18.0 h1:h7sS0VqCkfBMGgcHaudJFB4FE6Td71H6svRB2poRnGY= +github.com/RoaringBitmap/roaring/v2 v2.18.0/go.mod h1:eq4wdNXxtJIS/oikeCzdX1rBzek7ANzbth041hrU8Q4= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= diff --git a/packages/orchestrator/pkg/sandbox/block/cache.go b/packages/orchestrator/pkg/sandbox/block/cache.go index 9e82c653d7..a757e04bc8 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache.go +++ b/packages/orchestrator/pkg/sandbox/block/cache.go @@ -13,16 +13,16 @@ import ( "syscall" "time" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/edsrzf/mmap-go" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.uber.org/zap" "golang.org/x/sys/unix" - "github.com/e2b-dev/infra/packages/shared/pkg/atomicbitset" "github.com/e2b-dev/infra/packages/shared/pkg/logger" "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" + "github.com/e2b-dev/infra/packages/shared/pkg/syncroaring" "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" ) @@ -53,7 +53,7 @@ type Cache struct { blockSize int64 mmap *mmap.MMap mu sync.RWMutex - dirty *atomicbitset.Bitset + dirty *syncroaring.Bitset dirtyFile bool closed atomic.Bool } @@ -72,7 +72,7 @@ func NewCache(size, blockSize int64, filePath string, dirtyFile bool) (*Cache, e size: size, blockSize: blockSize, dirtyFile: dirtyFile, - dirty: atomicbitset.New(), + dirty: syncroaring.New(), }, nil } @@ -97,7 +97,7 @@ func NewCache(size, blockSize int64, filePath string, dirtyFile bool) (*Cache, e size: size, blockSize: blockSize, dirtyFile: dirtyFile, - dirty: atomicbitset.New(), + dirty: syncroaring.New(), }, nil } @@ -117,7 +117,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet } if c.mmap == nil { - return header.NewDiffMetadata(c.blockSize, bitset.New(0)), nil + return header.NewDiffMetadata(c.blockSize, roaring.New()), nil } f, err := os.Open(c.filePath) @@ -136,7 +136,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet logger.L().Warn(ctx, "error syncing file", zap.Error(err)) } - diffMetadata := header.NewDiffMetadata(c.blockSize, c.dirty.BitSet()) + diffMetadata := header.NewDiffMetadata(c.blockSize, c.dirty.Clone()) dst := int(out.Fd()) var writeOffset int64 @@ -194,7 +194,7 @@ func (c *Cache) ExportToDiff(ctx context.Context, out *os.File) (*header.DiffMet telemetry.SetAttributes(ctx, attribute.Int64("copy_ms", time.Since(copyStart).Milliseconds()), attribute.Int64("total_size_bytes", c.size), - attribute.Int64("dirty_size_bytes", int64(diffMetadata.Dirty.Count())*c.blockSize), + attribute.Int64("dirty_size_bytes", int64(diffMetadata.Dirty.GetCardinality())*c.blockSize), attribute.Int64("total_ranges", totalRanges), ) diff --git a/packages/orchestrator/pkg/sandbox/block/cache_test.go b/packages/orchestrator/pkg/sandbox/block/cache_test.go index 25e8974596..bf44d52abe 100644 --- a/packages/orchestrator/pkg/sandbox/block/cache_test.go +++ b/packages/orchestrator/pkg/sandbox/block/cache_test.go @@ -257,8 +257,8 @@ func TestCacheExportToDiff_ZeroDirtyBlockEmittedAsDirtyPayload(t *testing.T) { diffMetadata, err := cache.ExportToDiff(t.Context(), out) require.NoError(t, err) - require.EqualValues(t, 1, diffMetadata.Dirty.Count(), "zero-filled dirty block should be emitted as dirty payload") - require.EqualValues(t, 0, diffMetadata.Empty.Count(), "zero-filled dirty block should not be tracked in empty metadata") + require.EqualValues(t, 1, diffMetadata.Dirty.GetCardinality(), "zero-filled dirty block should be emitted as dirty payload") + require.EqualValues(t, 0, diffMetadata.Empty.GetCardinality(), "zero-filled dirty block should not be tracked in empty metadata") stat, err := out.Stat() require.NoError(t, err) @@ -335,8 +335,8 @@ func TestCacheExportToDiff_MixedDirtyBlocksKeepsZeroBlockInDiff(t *testing.T) { diffMetadata, err := cache.ExportToDiff(t.Context(), out) require.NoError(t, err) - require.EqualValues(t, 2, diffMetadata.Dirty.Count()) - require.EqualValues(t, 0, diffMetadata.Empty.Count(), "mixed export should still skip empty tracking for zero-filled dirty blocks") + require.EqualValues(t, 2, diffMetadata.Dirty.GetCardinality()) + require.EqualValues(t, 0, diffMetadata.Empty.GetCardinality(), "mixed export should still skip empty tracking for zero-filled dirty blocks") _, err = out.Seek(0, io.SeekStart) require.NoError(t, err) @@ -399,10 +399,10 @@ func TestCacheExportToDiff_NonContiguousDirtyBlocksPreserveRangeOrder(t *testing diffMetadata, err := cache.ExportToDiff(t.Context(), out) require.NoError(t, err) - require.EqualValues(t, 2, diffMetadata.Dirty.Count()) - require.True(t, diffMetadata.Dirty.Test(0)) - require.True(t, diffMetadata.Dirty.Test(3)) - require.EqualValues(t, 0, diffMetadata.Empty.Count()) + require.EqualValues(t, 2, diffMetadata.Dirty.GetCardinality()) + require.True(t, diffMetadata.Dirty.Contains(0)) + require.True(t, diffMetadata.Dirty.Contains(3)) + require.EqualValues(t, 0, diffMetadata.Empty.GetCardinality()) _, err = out.Seek(0, io.SeekStart) require.NoError(t, err) diff --git a/packages/orchestrator/pkg/sandbox/block/range.go b/packages/orchestrator/pkg/sandbox/block/range.go index 6a18ca5192..7d86488a3e 100644 --- a/packages/orchestrator/pkg/sandbox/block/range.go +++ b/packages/orchestrator/pkg/sandbox/block/range.go @@ -3,7 +3,7 @@ package block import ( "iter" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" ) @@ -36,24 +36,13 @@ func NewRangeFromBlocks(startIdx, numberOfBlocks, blockSize int64) Range { } } -// bitsetRanges returns a sequence of the ranges of the set bits of the bitset. -func BitsetRanges(b *bitset.BitSet, blockSize int64) iter.Seq[Range] { +// BitsetRanges returns a sequence of the ranges of the set bits of the bitmap. +func BitsetRanges(b *roaring.Bitmap, blockSize int64) iter.Seq[Range] { return func(yield func(Range) bool) { - start, found := b.NextSet(0) - - for found { - end, endOk := b.NextClear(start) - if !endOk { - yield(NewRangeFromBlocks(int64(start), int64(b.Len()-start), blockSize)) - - return - } - - if !yield(NewRangeFromBlocks(int64(start), int64(end-start), blockSize)) { + for start, endExcl := range b.Ranges() { + if !yield(NewRangeFromBlocks(int64(start), int64(endExcl)-int64(start), blockSize)) { return } - - start, found = b.NextSet(end + 1) } } } diff --git a/packages/orchestrator/pkg/sandbox/block/range_test.go b/packages/orchestrator/pkg/sandbox/block/range_test.go index 8e23163139..3e2f88681f 100644 --- a/packages/orchestrator/pkg/sandbox/block/range_test.go +++ b/packages/orchestrator/pkg/sandbox/block/range_test.go @@ -5,7 +5,7 @@ import ( "slices" "testing" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -290,7 +290,7 @@ func TestRange_Offsets_Iteration(t *testing.T) { func TestBitsetRanges_Empty(t *testing.T) { t.Parallel() - b := bitset.New(100) + b := roaring.New() blockSize := int64(4096) ranges := slices.Collect(BitsetRanges(b, blockSize)) @@ -299,8 +299,8 @@ func TestBitsetRanges_Empty(t *testing.T) { func TestBitsetRanges_SingleBit(t *testing.T) { t.Parallel() - b := bitset.New(100) - b.Set(5) + b := roaring.New() + b.Add(5) blockSize := int64(4096) ranges := slices.Collect(BitsetRanges(b, blockSize)) @@ -313,12 +313,12 @@ func TestBitsetRanges_SingleBit(t *testing.T) { func TestBitsetRanges_Contiguous(t *testing.T) { t.Parallel() - b := bitset.New(100) + b := roaring.New() // Set bits 2, 3, 4, 5 - b.Set(2) - b.Set(3) - b.Set(4) - b.Set(5) + b.Add(2) + b.Add(3) + b.Add(4) + b.Add(5) blockSize := int64(4096) ranges := slices.Collect(BitsetRanges(b, blockSize)) @@ -331,15 +331,15 @@ func TestBitsetRanges_Contiguous(t *testing.T) { func TestBitsetRanges_MultipleRanges(t *testing.T) { t.Parallel() - b := bitset.New(100) + b := roaring.New() // Set bits 1, 2, 3 (contiguous) - b.Set(1) - b.Set(2) - b.Set(3) + b.Add(1) + b.Add(2) + b.Add(3) // Gap // Set bits 7, 8 (contiguous) - b.Set(7) - b.Set(8) + b.Add(7) + b.Add(8) blockSize := int64(4096) ranges := slices.Collect(BitsetRanges(b, blockSize)) @@ -356,9 +356,9 @@ func TestBitsetRanges_MultipleRanges(t *testing.T) { func TestBitsetRanges_AllSet(t *testing.T) { t.Parallel() - b := bitset.New(10) - for i := range uint(10) { - b.Set(i) + b := roaring.New() + for i := range uint32(10) { + b.Add(i) } blockSize := int64(4096) @@ -372,10 +372,10 @@ func TestBitsetRanges_AllSet(t *testing.T) { func TestBitsetRanges_EndOfBitset(t *testing.T) { t.Parallel() - b := bitset.New(20) + b := roaring.New() // Set bits 15, 16, 17, 18, 19 (at the end) - for i := uint(15); i < 20; i++ { - b.Set(i) + for i := uint32(15); i < 20; i++ { + b.Add(i) } blockSize := int64(4096) @@ -389,12 +389,12 @@ func TestBitsetRanges_EndOfBitset(t *testing.T) { func TestBitsetRanges_Sparse(t *testing.T) { t.Parallel() - b := bitset.New(100) + b := roaring.New() // Set individual bits with gaps - b.Set(0) - b.Set(10) - b.Set(20) - b.Set(30) + b.Add(0) + b.Add(10) + b.Add(20) + b.Add(30) blockSize := int64(4096) ranges := slices.Collect(BitsetRanges(b, blockSize)) diff --git a/packages/orchestrator/pkg/sandbox/block/tracker.go b/packages/orchestrator/pkg/sandbox/block/tracker.go index 7cd21a562b..91c4975e9b 100644 --- a/packages/orchestrator/pkg/sandbox/block/tracker.go +++ b/packages/orchestrator/pkg/sandbox/block/tracker.go @@ -4,14 +4,13 @@ import ( "iter" "sync" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" - "github.com/e2b-dev/infra/packages/shared/pkg/utils" ) type Tracker struct { - b *bitset.BitSet + b *roaring.Bitmap mu sync.RWMutex blockSize int64 @@ -19,8 +18,7 @@ type Tracker struct { func NewTracker(blockSize int64) *Tracker { return &Tracker{ - // The bitset resizes automatically based on the maximum set bit. - b: bitset.New(0), + b: roaring.New(), blockSize: blockSize, } } @@ -29,27 +27,21 @@ func (t *Tracker) Has(off int64) bool { t.mu.RLock() defer t.mu.RUnlock() - return t.b.Test(uint(header.BlockIdx(off, t.blockSize))) + return t.b.Contains(uint32(header.BlockIdx(off, t.blockSize))) } func (t *Tracker) Add(off int64) { t.mu.Lock() defer t.mu.Unlock() - t.b.Set(uint(header.BlockIdx(off, t.blockSize))) + t.b.Add(uint32(header.BlockIdx(off, t.blockSize))) } func (t *Tracker) Reset() { t.mu.Lock() defer t.mu.Unlock() - t.b.ClearAll() -} - -// BitSet returns the bitset. -// This is not safe to use concurrently. -func (t *Tracker) BitSet() *bitset.BitSet { - return t.b + t.b.Clear() } func (t *Tracker) BlockSize() int64 { @@ -70,11 +62,11 @@ func (t *Tracker) Offsets() iter.Seq[int64] { t.mu.RLock() defer t.mu.RUnlock() - return bitsetOffsets(t.b.Clone(), t.BlockSize()) -} + snapshot := t.b.Clone() -func bitsetOffsets(b *bitset.BitSet, blockSize int64) iter.Seq[int64] { - return utils.TransformTo(b.EachSet(), func(idx uint) int64 { - return header.BlockOffset(int64(idx), blockSize) - }) + return func(yield func(int64) bool) { + snapshot.Iterate(func(idx uint32) bool { + return yield(header.BlockOffset(int64(idx), t.blockSize)) + }) + } } diff --git a/packages/orchestrator/pkg/sandbox/fc/client.go b/packages/orchestrator/pkg/sandbox/fc/client.go index f1e913f2a1..f5c9eb30d0 100644 --- a/packages/orchestrator/pkg/sandbox/fc/client.go +++ b/packages/orchestrator/pkg/sandbox/fc/client.go @@ -5,7 +5,7 @@ import ( "fmt" "runtime" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/firecracker-microvm/firecracker-go-sdk" "github.com/go-openapi/strfmt" @@ -451,8 +451,8 @@ func (c *apiClient) memoryInfo(ctx context.Context, blockSize int64) (*header.Di } return &header.DiffMetadata{ - Dirty: bitset.From(res.Payload.Resident), - Empty: bitset.From(res.Payload.Empty), + Dirty: roaring.FromDense(res.Payload.Resident, false), + Empty: roaring.FromDense(res.Payload.Empty, false), BlockSize: blockSize, }, nil } @@ -468,8 +468,8 @@ func (c *apiClient) dirtyMemory(ctx context.Context, blockSize int64) (*header.D } return &header.DiffMetadata{ - Dirty: bitset.From(res.Payload.Bitmap), - Empty: bitset.New(0), + Dirty: roaring.FromDense(res.Payload.Bitmap, false), + Empty: roaring.New(), BlockSize: blockSize, }, nil } diff --git a/packages/orchestrator/pkg/sandbox/fc/memory.go b/packages/orchestrator/pkg/sandbox/fc/memory.go index 13e35c7e60..6d1cf3eb10 100644 --- a/packages/orchestrator/pkg/sandbox/fc/memory.go +++ b/packages/orchestrator/pkg/sandbox/fc/memory.go @@ -4,7 +4,7 @@ import ( "context" "fmt" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/block" "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" @@ -23,7 +23,7 @@ func (p *Process) DirtyMemory(ctx context.Context, blockSize int64) (*header.Dif func (p *Process) ExportMemory( ctx context.Context, - include *bitset.BitSet, + include *roaring.Bitmap, cachePath string, blockSize int64, ) (*block.Cache, error) { diff --git a/packages/orchestrator/pkg/sandbox/rootfs/direct.go b/packages/orchestrator/pkg/sandbox/rootfs/direct.go index 13bc98cb4d..0e61717397 100644 --- a/packages/orchestrator/pkg/sandbox/rootfs/direct.go +++ b/packages/orchestrator/pkg/sandbox/rootfs/direct.go @@ -131,7 +131,7 @@ func (o *DirectProvider) exportToDiff(ctx context.Context, out io.Writer) (*head return nil, fmt.Errorf("error flushing path: %w", err) } - builder := header.NewDiffMetadataBuilder(int64(o.header.Metadata.Size), o.blockSize) + builder := header.NewDiffMetadataBuilder(o.blockSize) f, err := os.Open(o.path) if err != nil { diff --git a/packages/orchestrator/pkg/sandbox/uffd/noop.go b/packages/orchestrator/pkg/sandbox/uffd/noop.go index 8b6ea14ee1..c317313c2b 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/noop.go +++ b/packages/orchestrator/pkg/sandbox/uffd/noop.go @@ -3,7 +3,7 @@ package uffd import ( "context" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/block" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/fc" @@ -38,17 +38,15 @@ func (m *NoopMemory) DiffMetadata(ctx context.Context, f *fc.Process) (*header.D return nil, err } - dirty := diffInfo.Dirty.Difference(diffInfo.Empty) + diffInfo.Dirty.AndNot(diffInfo.Empty) numberOfPages := header.TotalBlocks(m.size, m.blockSize) - empty := bitset.New(uint(numberOfPages)) - empty.FlipRange(0, uint(numberOfPages)) - - empty = empty.Difference(dirty) + empty := roaring.Flip(diffInfo.Dirty, 0, uint64(numberOfPages)) + empty.RemoveRange(uint64(numberOfPages), uint64(1)<<32) return &header.DiffMetadata{ - Dirty: dirty, + Dirty: diffInfo.Dirty, Empty: empty, BlockSize: m.blockSize, }, nil diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/helpers_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/helpers_test.go index 3fb3829238..3e84101ad3 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/helpers_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/helpers_test.go @@ -4,10 +4,9 @@ import ( "bytes" "context" "fmt" - "slices" "sync" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/uffd/testutils" ) @@ -81,15 +80,21 @@ func (h *testHandler) executeWrite(ctx context.Context, op operation) error { return nil } -// Get a bitset of the offsets of the operations for the given mode. func getOperationsOffsets(ops []operation, m operationMode) []uint { - b := bitset.New(0) + b := roaring.New() for _, operation := range ops { if operation.mode&m != 0 { - b.Set(uint(operation.offset)) + b.Add(uint32(operation.offset)) } } - return slices.Collect(b.EachSet()) + result := make([]uint, 0, b.GetCardinality()) + b.Iterate(func(x uint32) bool { + result = append(result, uint(x)) + + return true + }) + + return result } diff --git a/packages/shared/go.mod b/packages/shared/go.mod index fe3d45a061..c72b8dd575 100644 --- a/packages/shared/go.mod +++ b/packages/shared/go.mod @@ -8,13 +8,12 @@ require ( cloud.google.com/go/artifactregistry v1.17.1 cloud.google.com/go/storage v1.59.2 connectrpc.com/connect v1.18.1 - github.com/RoaringBitmap/roaring/v2 v2.16.1 + github.com/RoaringBitmap/roaring/v2 v2.18.0 github.com/aws/aws-sdk-go-v2 v1.41.0 github.com/aws/aws-sdk-go-v2/config v1.32.6 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.74 github.com/aws/aws-sdk-go-v2/service/ecr v1.44.0 github.com/aws/aws-sdk-go-v2/service/s3 v1.79.3 - github.com/bits-and-blooms/bitset v1.24.2 github.com/bsm/redislock v0.9.4 github.com/dchest/uniuri v1.2.0 github.com/gin-gonic/gin v1.12.0 @@ -112,6 +111,7 @@ require ( github.com/axiomhq/hyperloglog v0.2.5 // indirect github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/bits-and-blooms/bitset v1.24.2 // indirect github.com/bits-and-blooms/bloom/v3 v3.7.0 // indirect github.com/bytedance/gopkg v0.1.4 // indirect github.com/bytedance/sonic v1.15.0 // indirect diff --git a/packages/shared/go.sum b/packages/shared/go.sum index 3da766d305..7007ed2784 100644 --- a/packages/shared/go.sum +++ b/packages/shared/go.sum @@ -78,8 +78,8 @@ github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpz github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= -github.com/RoaringBitmap/roaring/v2 v2.16.1 h1:HmKj5kwyUJVvCuaM8BdLSHVlYn3O9hzgSpaog6bSxmY= -github.com/RoaringBitmap/roaring/v2 v2.16.1/go.mod h1:eq4wdNXxtJIS/oikeCzdX1rBzek7ANzbth041hrU8Q4= +github.com/RoaringBitmap/roaring/v2 v2.18.0 h1:h7sS0VqCkfBMGgcHaudJFB4FE6Td71H6svRB2poRnGY= +github.com/RoaringBitmap/roaring/v2 v2.18.0/go.mod h1:eq4wdNXxtJIS/oikeCzdX1rBzek7ANzbth041hrU8Q4= github.com/Workiva/go-datastructures v1.1.6 h1:e2eUkTi+YlNRw6YxH2c+DmgXENTKjCofaiVeDIv6e/U= github.com/Workiva/go-datastructures v1.1.6/go.mod h1:1yZL+zfsztete+ePzZz/Zb1/t5BnDuE2Ya2MMGhzP6A= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= diff --git a/packages/shared/pkg/storage/header/mapping.go b/packages/shared/pkg/storage/header/mapping.go index ada40e2b60..eaa9ea0f26 100644 --- a/packages/shared/pkg/storage/header/mapping.go +++ b/packages/shared/pkg/storage/header/mapping.go @@ -4,7 +4,7 @@ import ( "fmt" "os" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/google/uuid" ) @@ -21,46 +21,23 @@ type BuildMap struct { func CreateMapping( buildId *uuid.UUID, - dirty *bitset.BitSet, + dirty *roaring.Bitmap, blockSize int64, ) []BuildMap { var mappings []BuildMap - - var startBlock uint - var blockLength uint var buildStorageOffset uint64 - for blockIdx, e := dirty.NextSet(0); e; blockIdx, e = dirty.NextSet(blockIdx + 1) { - if startBlock+blockLength == blockIdx { - blockLength++ - - continue - } - - if blockLength > 0 { - m := BuildMap{ - Offset: uint64(startBlock) * uint64(blockSize), - BuildId: *buildId, - Length: uint64(blockLength) * uint64(blockSize), - BuildStorageOffset: buildStorageOffset, - } - - mappings = append(mappings, m) - - buildStorageOffset += m.Length - } - - startBlock = blockIdx - blockLength = 1 - } - - if blockLength > 0 { - mappings = append(mappings, BuildMap{ - Offset: uint64(startBlock) * uint64(blockSize), + for start, endExcl := range dirty.Ranges() { + blockLength := int64(endExcl) - int64(start) + m := BuildMap{ + Offset: uint64(BlockOffset(int64(start), blockSize)), BuildId: *buildId, - Length: uint64(blockLength) * uint64(blockSize), + Length: uint64(BlockOffset(blockLength, blockSize)), BuildStorageOffset: buildStorageOffset, - }) + } + + mappings = append(mappings, m) + buildStorageOffset += m.Length } return mappings diff --git a/packages/shared/pkg/storage/header/metadata.go b/packages/shared/pkg/storage/header/metadata.go index 1cec190aaf..86cb41f192 100644 --- a/packages/shared/pkg/storage/header/metadata.go +++ b/packages/shared/pkg/storage/header/metadata.go @@ -5,7 +5,7 @@ import ( "fmt" "io" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" "github.com/google/uuid" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" @@ -18,16 +18,16 @@ import ( var ignoreBuildID = uuid.Nil type DiffMetadata struct { - Dirty *bitset.BitSet - Empty *bitset.BitSet + Dirty *roaring.Bitmap + Empty *roaring.Bitmap BlockSize int64 } -func NewDiffMetadata(blockSize int64, dirty *bitset.BitSet) *DiffMetadata { +func NewDiffMetadata(blockSize int64, dirty *roaring.Bitmap) *DiffMetadata { return &DiffMetadata{ Dirty: dirty, - Empty: bitset.New(0), + Empty: roaring.New(), BlockSize: blockSize, } } @@ -87,7 +87,7 @@ func (d *DiffMetadata) ToDiffHeader( telemetry.SetAttributes(ctx, attribute.Int64("snapshot.header.mappings.length", int64(len(m))), - attribute.Int64("snapshot.diff.size", int64(d.Dirty.Count()*uint(originalHeader.Metadata.BlockSize))), + attribute.Int64("snapshot.diff.size", int64(d.Dirty.GetCardinality())*int64(originalHeader.Metadata.BlockSize)), attribute.Int64("snapshot.mapped_size", int64(metadata.Size)), attribute.Int64("snapshot.block_size", int64(metadata.BlockSize)), attribute.Int64("snapshot.metadata.version", int64(metadata.Version)), @@ -114,17 +114,16 @@ func (d *DiffMetadata) ToDiffHeader( } type DiffMetadataBuilder struct { - dirty *bitset.BitSet - empty *bitset.BitSet + dirty *roaring.Bitmap + empty *roaring.Bitmap blockSize int64 } -func NewDiffMetadataBuilder(size, blockSize int64) *DiffMetadataBuilder { +func NewDiffMetadataBuilder(blockSize int64) *DiffMetadataBuilder { return &DiffMetadataBuilder{ - // TODO: We might be able to start with 0 as preallocating here actually takes space. - dirty: bitset.New(uint(TotalBlocks(size, blockSize))), - empty: bitset.New(0), + dirty: roaring.New(), + empty: roaring.New(), blockSize: blockSize, } @@ -138,12 +137,12 @@ func (b *DiffMetadataBuilder) Process(ctx context.Context, block []byte, out io. return fmt.Errorf("error checking empty block: %w", err) } if isEmpty { - b.empty.Set(uint(blockIdx)) + b.empty.Add(uint32(blockIdx)) return nil } - b.dirty.Set(uint(blockIdx)) + b.dirty.Add(uint32(blockIdx)) n, err := out.Write(block) if err != nil { logger.L().Error(ctx, "error writing to out", zap.Error(err)) diff --git a/packages/shared/pkg/atomicbitset/bitset.go b/packages/shared/pkg/syncroaring/bitset.go similarity index 65% rename from packages/shared/pkg/atomicbitset/bitset.go rename to packages/shared/pkg/syncroaring/bitset.go index 6cc3e54d01..f23fb22865 100644 --- a/packages/shared/pkg/atomicbitset/bitset.go +++ b/packages/shared/pkg/syncroaring/bitset.go @@ -1,10 +1,9 @@ -package atomicbitset +package syncroaring import ( "sync" - roaring "github.com/RoaringBitmap/roaring/v2" - "github.com/bits-and-blooms/bitset" + "github.com/RoaringBitmap/roaring/v2" ) type Bitset struct { @@ -13,9 +12,7 @@ type Bitset struct { } func New() *Bitset { - return &Bitset{ - bm: roaring.New(), - } + return &Bitset{bm: roaring.New()} } func (b *Bitset) HasRange(start, end uint64) bool { @@ -32,9 +29,9 @@ func (b *Bitset) SetRange(start, end uint64) { b.bm.AddRange(start, end) } -func (b *Bitset) BitSet() *bitset.BitSet { +func (b *Bitset) Clone() *roaring.Bitmap { b.mu.RLock() defer b.mu.RUnlock() - return b.bm.ToBitSet() + return b.bm.Clone() } diff --git a/packages/shared/pkg/atomicbitset/bitset_test.go b/packages/shared/pkg/syncroaring/bitset_test.go similarity index 98% rename from packages/shared/pkg/atomicbitset/bitset_test.go rename to packages/shared/pkg/syncroaring/bitset_test.go index 441dbd2bea..66f5f6be54 100644 --- a/packages/shared/pkg/atomicbitset/bitset_test.go +++ b/packages/shared/pkg/syncroaring/bitset_test.go @@ -1,10 +1,10 @@ -package atomicbitset +package syncroaring import ( "sync" "testing" - roaring "github.com/RoaringBitmap/roaring/v2" + "github.com/RoaringBitmap/roaring/v2" "github.com/stretchr/testify/require" )