-
Notifications
You must be signed in to change notification settings - Fork 13
fix(worker): mark uploads as error on permanent finisher failure #728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,6 +87,41 @@ class UploadFinisherTask(BaseCodecovTask, name=upload_finisher_task_name): | |
|
|
||
| max_retries = UPLOAD_PROCESSOR_MAX_RETRIES | ||
|
|
||
| def _mark_uploads_as_error(self, db_session, upload_ids: list) -> None: | ||
| """Best-effort: transition uploads to error state so they are not re-processed. | ||
|
|
||
| When the finisher fails permanently (unrecoverable exception, soft time | ||
| limit, or max retries exceeded), uploads stay in "started" state. The | ||
| next upload to the same commit will re-discover them and spawn another | ||
| finisher that fails again, creating a retry loop. Marking them as error | ||
| breaks that cycle. | ||
|
|
||
| The whole operation is wrapped in try/except because the DB session may | ||
| already be in a broken state (e.g. after an OperationalError). | ||
| """ | ||
| if not upload_ids: | ||
| return | ||
| try: | ||
| db_session.rollback() | ||
| db_session.query(Upload).filter(Upload.id_.in_(upload_ids)).update( | ||
| { | ||
| Upload.state: "error", | ||
| Upload.state_id: UploadState.ERROR.db_id, | ||
| }, | ||
| synchronize_session="fetch", | ||
| ) | ||
| db_session.commit() | ||
| log.info( | ||
| "Marked uploads as error after permanent failure", | ||
| extra={"upload_ids": upload_ids}, | ||
| ) | ||
| except Exception: | ||
| log.warning( | ||
| "Failed to mark uploads as error (DB may be unreachable)", | ||
| extra={"upload_ids": upload_ids}, | ||
| exc_info=True, | ||
| ) | ||
|
|
||
| def _find_started_uploads_with_reports( | ||
| self, db_session, commit: Commit | ||
| ) -> set[int]: | ||
|
|
@@ -369,6 +404,7 @@ def run_impl( | |
|
|
||
| except SoftTimeLimitExceeded: | ||
| log.warning("run_impl: soft time limit exceeded") | ||
| self._mark_uploads_as_error(db_session, upload_ids) | ||
| self._call_upload_breadcrumb_task( | ||
| commit_sha=commitid, | ||
| repo_id=repoid, | ||
|
|
@@ -388,6 +424,7 @@ def run_impl( | |
| "Unexpected error in upload finisher", | ||
| extra={"upload_ids": upload_ids}, | ||
| ) | ||
| self._mark_uploads_as_error(db_session, upload_ids) | ||
| self._call_upload_breadcrumb_task( | ||
| commit_sha=commitid, | ||
| repo_id=repoid, | ||
|
|
@@ -484,6 +521,7 @@ def _process_reports_with_lock( | |
| "repoid": repoid, | ||
| }, | ||
| ) | ||
| self._mark_uploads_as_error(db_session, upload_ids) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Committed error state alters caller's control flow unintentionallyMedium Severity When Additional Locations (1) |
||
| self._call_upload_breadcrumb_task( | ||
| commit_sha=commitid, | ||
| repo_id=repoid, | ||
|
|
@@ -605,6 +643,7 @@ def _handle_finisher_lock( | |
| "repoid": repoid, | ||
| }, | ||
| ) | ||
| self._mark_uploads_as_error(db_session, upload_ids) | ||
| self._call_upload_breadcrumb_task( | ||
| commit_sha=commitid, | ||
| repo_id=repoid, | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing state filter may revert processed uploads to error
High Severity
_mark_uploads_as_errorupdates all uploads matchingupload_idsregardless of their current state. When_handle_finisher_lockhits max lock retries (or aSoftTimeLimitExceeded/Exceptionfires during that phase), uploads have already been committed as"processed"by the earlier_process_reports_with_lockcall. The unfiltered update will revert them from"processed"to"error", incorrectly discarding successful work. The query needs an additional filter likeUpload.state == "started"to only affect uploads that are genuinely stuck.Additional Locations (1)
apps/worker/tasks/upload_finisher.py#L645-L646