-
Notifications
You must be signed in to change notification settings - Fork 279
Use a detached timeout for pause completion in API #2238
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
5937c2e
b7dc232
999e201
dfac5c7
5f17d26
5c8f238
023fd3c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| "errors" | ||
| "fmt" | ||
| "net/http" | ||
| "time" | ||
|
|
||
| "github.com/gin-gonic/gin" | ||
| "github.com/google/uuid" | ||
|
|
@@ -22,6 +23,23 @@ | |
| "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" | ||
| ) | ||
|
|
||
| const ( | ||
| pauseRequestTimeout = 15 * time.Minute | ||
| pauseRequestWaitTimeout = 60 * time.Second | ||
| ) | ||
|
|
||
| type pauseRequestResult struct { | ||
| statusCode int | ||
| clientMsg string | ||
| } | ||
|
|
||
| func sendPauseInProgressResponse(c *gin.Context) { | ||
| c.JSON(http.StatusAccepted, gin.H{ | ||
| "code": int32(http.StatusAccepted), | ||
| "message": "Pause is still in progress. Check the sandbox info endpoint for the latest status.", | ||
| }) | ||
|
Check failure on line 40 in packages/api/internal/handlers/sandbox_pause.go
|
||
|
Comment on lines
+35
to
+40
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 The new 202 Accepted response from Extended reasoning...What the bug is and how it manifests The PR introduces The specific code path that triggers it In Why existing code doesn't prevent it The auto-generated server code ( What the impact would be Any caller that checks How to fix it Add a Step-by-step proof
|
||
| } | ||
|
|
||
| func (a *APIStore) PostSandboxesSandboxIDPause(c *gin.Context, sandboxID api.SandboxID) { | ||
| ctx := c.Request.Context() | ||
| // Get team from context, use TeamContextKey | ||
|
|
@@ -44,40 +62,36 @@ | |
|
|
||
| pause.LogInitiated(ctx, sandboxID, teamID.String(), pause.ReasonRequest) | ||
|
|
||
| err = a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionPause}) | ||
| var transErr *sandbox.InvalidStateTransitionError | ||
| resultCh := make(chan pauseRequestResult, 1) | ||
|
|
||
| switch { | ||
| case err == nil: | ||
| pause.LogSuccess(ctx, sandboxID, teamID.String(), pause.ReasonRequest) | ||
| case errors.Is(err, orchestrator.ErrSandboxNotFound): | ||
| apiErr := pauseHandleNotRunningSandbox(ctx, a.snapshotCache, sandboxID, teamID) | ||
| switch apiErr.Code { | ||
| case http.StatusConflict: | ||
| pause.LogSkipped(ctx, sandboxID, teamID.String(), pause.ReasonRequest, pause.SkipReasonAlreadyPaused) | ||
| case http.StatusNotFound: | ||
| pause.LogSkipped(ctx, sandboxID, teamID.String(), pause.ReasonRequest, pause.SkipReasonNotFound) | ||
| default: | ||
| pause.LogFailure(ctx, sandboxID, teamID.String(), pause.ReasonRequest, err) | ||
| go func() { | ||
| pauseCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), pauseRequestTimeout) | ||
| defer cancel() | ||
|
|
||
| resultCh <- a.pauseSandboxRequest(pauseCtx, sandboxID, teamID) | ||
| close(resultCh) | ||
| }() | ||
|
|
||
| select { | ||
|
Check failure on line 75 in packages/api/internal/handlers/sandbox_pause.go
|
||
matthewlouisbrockman marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| case result := <-resultCh: | ||
| if result.clientMsg == "" { | ||
| c.Status(result.statusCode) | ||
|
|
||
| return | ||
| } | ||
| a.sendAPIStoreError(c, apiErr.Code, apiErr.ClientMsg) | ||
|
|
||
| return | ||
| case errors.As(err, &transErr): | ||
| pause.LogFailure(ctx, sandboxID, teamID.String(), pause.ReasonRequest, err) | ||
| a.sendAPIStoreError(c, http.StatusConflict, fmt.Sprintf("Sandbox '%s' cannot be paused while in '%s' state", sandboxID, transErr.CurrentState)) | ||
| a.sendAPIStoreError(c, result.statusCode, result.clientMsg) | ||
|
|
||
| return | ||
| default: | ||
| pause.LogFailure(ctx, sandboxID, teamID.String(), pause.ReasonRequest, err) | ||
| telemetry.ReportError(ctx, "error pausing sandbox", err) | ||
| case <-time.After(pauseRequestWaitTimeout): | ||
| sendPauseInProgressResponse(c) | ||
|
|
||
| a.sendAPIStoreError(c, http.StatusInternalServerError, "Error pausing sandbox") | ||
| return | ||
| case <-ctx.Done(): | ||
| sendPauseInProgressResponse(c) | ||
|
|
||
| return | ||
| } | ||
|
|
||
| c.Status(http.StatusNoContent) | ||
| } | ||
|
|
||
| func pauseHandleNotRunningSandbox(ctx context.Context, cache *snapshotcache.SnapshotCache, sandboxID string, teamID uuid.UUID) api.APIError { | ||
|
|
@@ -117,3 +131,39 @@ | |
| ClientMsg: "Error pausing sandbox", | ||
| } | ||
| } | ||
|
|
||
| func (a *APIStore) pauseSandboxRequest(ctx context.Context, sandboxID string, teamID uuid.UUID) pauseRequestResult { | ||
| err := a.orchestrator.RemoveSandbox(ctx, teamID, sandboxID, sandbox.RemoveOpts{Action: sandbox.StateActionPause}) | ||
| var transErr *sandbox.InvalidStateTransitionError | ||
|
|
||
| switch { | ||
| case err == nil: | ||
| pause.LogSuccess(ctx, sandboxID, teamID.String(), pause.ReasonRequest) | ||
|
|
||
| return pauseRequestResult{statusCode: http.StatusNoContent} | ||
| case errors.Is(err, orchestrator.ErrSandboxNotFound): | ||
| apiErr := pauseHandleNotRunningSandbox(ctx, a.snapshotCache, sandboxID, teamID) | ||
| switch apiErr.Code { | ||
| case http.StatusConflict: | ||
| pause.LogSkipped(ctx, sandboxID, teamID.String(), pause.ReasonRequest, pause.SkipReasonAlreadyPaused) | ||
| case http.StatusNotFound: | ||
| pause.LogSkipped(ctx, sandboxID, teamID.String(), pause.ReasonRequest, pause.SkipReasonNotFound) | ||
| default: | ||
| pause.LogFailure(ctx, sandboxID, teamID.String(), pause.ReasonRequest, err) | ||
| } | ||
|
|
||
| return pauseRequestResult{statusCode: apiErr.Code, clientMsg: apiErr.ClientMsg} | ||
| case errors.As(err, &transErr): | ||
| pause.LogFailure(ctx, sandboxID, teamID.String(), pause.ReasonRequest, err) | ||
|
|
||
| return pauseRequestResult{ | ||
| statusCode: http.StatusConflict, | ||
| clientMsg: fmt.Sprintf("Sandbox '%s' cannot be paused while in '%s' state", sandboxID, transErr.CurrentState), | ||
| } | ||
| default: | ||
| pause.LogFailure(ctx, sandboxID, teamID.String(), pause.ReasonRequest, err) | ||
| telemetry.ReportError(ctx, "error pausing sandbox", err) | ||
|
|
||
| return pauseRequestResult{statusCode: http.StatusInternalServerError, clientMsg: "Error pausing sandbox"} | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Returning
202 Acceptedhere introduces a new success path that is not represented in the generated API contract/client (PostSandboxesSandboxIDPauseResponseandParsePostSandboxesSandboxIDPauseResponseonly model 401/404/409/500, with callers commonly expecting 204). When pause runs longer than 60s or the request context is canceled, clients generated from the current spec will treat this as an unexpected status and may surface false failures even though the pause continues in the background.Useful? React with 👍 / 👎.