-
Notifications
You must be signed in to change notification settings - Fork 388
fix(pushsync): prevent silent chunk loss on shallow receipts #5390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 5 commits
850dfcb
0fab07c
9089e44
68669c3
68b9c47
acae9c8
03057b1
1fe15dd
6865805
1e13f0a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -299,6 +299,14 @@ func (ps *PushSync) handler(ctx context.Context, p p2p.Peer, stream p2p.Stream) | |
|
|
||
| switch receipt, err := ps.pushToClosest(ctx, chunk, false); { | ||
| case errors.Is(err, topology.ErrWantSelf): | ||
| // Only store if we are actually within our neighborhood. If the chunk | ||
| // is outside our AOR we would store it in a low bin and unreserve() | ||
| // would evict it almost immediately, leaving the chunk nowhere on the | ||
| // network while the origin believes it was delivered. | ||
| if swarm.Proximity(ps.address.Bytes(), chunkAddress.Bytes()) < rad { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if you have doubling set on, and the chunk is saved into a sister neighborhood? Should we have here a check based on the committed depth? |
||
| ps.metrics.OutOfDepthStoring.Inc() | ||
| return ErrOutOfDepthStoring | ||
| } | ||
| stored, reason = true, "want self" | ||
| return store(ctx) | ||
| case err == nil: | ||
|
|
@@ -361,10 +369,11 @@ func (ps *PushSync) pushToClosest(ctx context.Context, ch swarm.Chunk, origin bo | |
| ps.metrics.TotalRequests.Inc() | ||
|
|
||
| var ( | ||
| sentErrorsLeft = 1 | ||
| preemptiveTicker <-chan time.Time | ||
| inflight int | ||
| parallelForwards = maxMultiplexForwards | ||
| sentErrorsLeft = 1 | ||
| preemptiveTicker <-chan time.Time | ||
| inflight int | ||
| parallelForwards = maxMultiplexForwards | ||
| shallowReceiptResult *pb.Receipt | ||
| ) | ||
|
|
||
| if origin { | ||
|
|
@@ -419,11 +428,19 @@ func (ps *PushSync) pushToClosest(ctx context.Context, ch swarm.Chunk, origin bo | |
| if skip.PruneExpiresAfter(idAddress, overDraftRefresh) == 0 { // no overdraft peers, we have depleted ALL peers | ||
| if inflight == 0 { | ||
| if ps.fullNode { | ||
| // If a peer already has the chunk (even at wrong depth), don't | ||
| // store locally — the chunk is closer to its neighbourhood than us. | ||
| if shallowReceiptResult != nil { | ||
|
acud marked this conversation as resolved.
Outdated
|
||
| return shallowReceiptResult, ErrShallowReceipt | ||
| } | ||
| if cac.Valid(ch) { | ||
| go ps.unwrap(ch) | ||
| } | ||
| return nil, topology.ErrWantSelf | ||
| } | ||
| if shallowReceiptResult != nil { | ||
| return shallowReceiptResult, ErrShallowReceipt | ||
| } | ||
| ps.logger.Debug("no peers left", "chunk_address", ch.Address(), "error", err) | ||
| return nil, err | ||
| } | ||
|
|
@@ -486,12 +503,17 @@ func (ps *PushSync) pushToClosest(ctx context.Context, ch swarm.Chunk, origin bo | |
| return result.receipt, nil | ||
| } | ||
|
|
||
| switch err := ps.checkReceipt(result.receipt); { | ||
| case err == nil: | ||
| if err := ps.checkReceipt(result.receipt); err == nil { | ||
| return result.receipt, nil | ||
| case errors.Is(err, ErrShallowReceipt): | ||
| ps.errSkip.Add(idAddress, result.peer, skiplistDur) | ||
| return result.receipt, err | ||
| } else if errors.Is(err, ErrShallowReceipt) { | ||
| // Treat shallow receipt like any other failure: exhaust the full | ||
| // error budget and wait for any other inflight parallel pushes | ||
| // (e.g. multiplex forwards) before giving up. Only return | ||
| // ErrShallowReceipt once the entire budget is spent. | ||
| shallowReceiptResult = result.receipt | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you've removed the skiplist addition line that was on the left side of the diff (L493). any particular reason?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually it is not removed, it is on the common failure path at pushsync.go:524 |
||
| result.err = err | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if I understand this fully, but this statement and the next one in the |
||
| } else { | ||
| result.err = err | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -505,6 +527,9 @@ func (ps *PushSync) pushToClosest(ctx context.Context, ch swarm.Chunk, origin bo | |
| } | ||
| } | ||
|
|
||
| if shallowReceiptResult != nil { | ||
| return shallowReceiptResult, ErrShallowReceipt | ||
| } | ||
| return nil, ErrNoPush | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changing to ChunkCouldNotSync, will cause some false reporting because the ChunkCouldNotSync state is not checked and updated in uploadstore.go ->Report function. Maybe you would need to alsi include in that switch statement also ChunkCouldNotSync.