-
-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Builds: delete old build objects #12800
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 12 commits
5a3886f
c99a6a4
9fd3444
2633272
a9d312d
f70cc74
b5ff69f
180975b
6034157
428319a
25f014b
66233cd
739af82
a681538
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,29 +1,28 @@ | ||||||||||||||||||||||||
| import json | ||||||||||||||||||||||||
| from io import BytesIO | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| import requests | ||||||||||||||||||||||||
| import structlog | ||||||||||||||||||||||||
| from django.conf import settings | ||||||||||||||||||||||||
| from django.core.cache import cache | ||||||||||||||||||||||||
| from django.urls import reverse | ||||||||||||||||||||||||
| from django.utils import timezone | ||||||||||||||||||||||||
| from django.utils.translation import gettext_lazy as _ | ||||||||||||||||||||||||
| from oauthlib.oauth2.rfc6749.errors import InvalidGrantError | ||||||||||||||||||||||||
| from oauthlib.oauth2.rfc6749.errors import TokenExpiredError | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| from readthedocs import __version__ | ||||||||||||||||||||||||
| from readthedocs.api.v2.serializers import BuildCommandSerializer | ||||||||||||||||||||||||
| from readthedocs.api.v2.utils import delete_versions_from_db | ||||||||||||||||||||||||
| from readthedocs.api.v2.utils import get_deleted_active_versions | ||||||||||||||||||||||||
| from readthedocs.api.v2.utils import run_version_automation_rules | ||||||||||||||||||||||||
| from readthedocs.api.v2.utils import sync_versions_to_db | ||||||||||||||||||||||||
| from readthedocs.builds.constants import BRANCH | ||||||||||||||||||||||||
| from readthedocs.builds.constants import BUILD_FINAL_STATES | ||||||||||||||||||||||||
| from readthedocs.builds.constants import BUILD_STATUS_FAILURE | ||||||||||||||||||||||||
| from readthedocs.builds.constants import BUILD_STATUS_PENDING | ||||||||||||||||||||||||
| from readthedocs.builds.constants import BUILD_STATUS_SUCCESS | ||||||||||||||||||||||||
| from readthedocs.builds.constants import EXTERNAL | ||||||||||||||||||||||||
| from readthedocs.builds.constants import EXTERNAL_VERSION_STATE_CLOSED | ||||||||||||||||||||||||
| from readthedocs.builds.constants import LOCK_EXPIRE | ||||||||||||||||||||||||
| from readthedocs.builds.constants import MAX_BUILD_COMMAND_SIZE | ||||||||||||||||||||||||
| from readthedocs.builds.constants import TAG | ||||||||||||||||||||||||
| from readthedocs.builds.models import Build | ||||||||||||||||||||||||
| from readthedocs.builds.models import BuildConfig | ||||||||||||||||||||||||
|
|
@@ -32,6 +31,7 @@ | |||||||||||||||||||||||
| from readthedocs.builds.utils import memcache_lock | ||||||||||||||||||||||||
| from readthedocs.core.utils import send_email | ||||||||||||||||||||||||
| from readthedocs.core.utils import trigger_build | ||||||||||||||||||||||||
| from readthedocs.core.utils.db import delete_in_batches | ||||||||||||||||||||||||
| from readthedocs.integrations.models import HttpExchange | ||||||||||||||||||||||||
| from readthedocs.notifications.models import Notification | ||||||||||||||||||||||||
| from readthedocs.oauth.notifications import MESSAGE_OAUTH_BUILD_STATUS_FAILURE | ||||||||||||||||||||||||
|
|
@@ -45,7 +45,7 @@ | |||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| @app.task(queue="web", bind=True) | ||||||||||||||||||||||||
| def archive_builds_task(self, days=14, limit=200, delete=False): | ||||||||||||||||||||||||
| def archive_builds_task(self, days=14, limit=200): | ||||||||||||||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
| Task to archive old builds to cold storage. | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
@@ -71,33 +71,8 @@ def archive_builds_task(self, days=14, limit=200, delete=False): | |||||||||||||||||||||||
| .prefetch_related("commands") | ||||||||||||||||||||||||
| .only("date", "cold_storage")[:limit] | ||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| for build in queryset: | ||||||||||||||||||||||||
| commands = BuildCommandSerializer(build.commands, many=True).data | ||||||||||||||||||||||||
| if commands: | ||||||||||||||||||||||||
| for cmd in commands: | ||||||||||||||||||||||||
| if len(cmd["output"]) > MAX_BUILD_COMMAND_SIZE: | ||||||||||||||||||||||||
| cmd["output"] = cmd["output"][-MAX_BUILD_COMMAND_SIZE:] | ||||||||||||||||||||||||
| cmd["output"] = ( | ||||||||||||||||||||||||
| "\n\n" | ||||||||||||||||||||||||
| "... (truncated) ..." | ||||||||||||||||||||||||
| "\n\n" | ||||||||||||||||||||||||
| "Command output too long. Truncated to last 1MB." | ||||||||||||||||||||||||
| "\n\n" + cmd["output"] | ||||||||||||||||||||||||
| ) # noqa | ||||||||||||||||||||||||
| log.debug("Truncating build command for build.", build_id=build.id) | ||||||||||||||||||||||||
| output = BytesIO(json.dumps(commands).encode("utf8")) | ||||||||||||||||||||||||
| filename = "{date}/{id}.json".format(date=str(build.date.date()), id=build.id) | ||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||
| build_commands_storage.save(name=filename, content=output) | ||||||||||||||||||||||||
| if delete: | ||||||||||||||||||||||||
| build.commands.all().delete() | ||||||||||||||||||||||||
| except IOError: | ||||||||||||||||||||||||
| log.exception("Cold Storage save failure") | ||||||||||||||||||||||||
| continue | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| build.cold_storage = True | ||||||||||||||||||||||||
| build.save() | ||||||||||||||||||||||||
| build.move_to_cold_storage() | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| @app.task(queue="web") | ||||||||||||||||||||||||
|
|
@@ -606,3 +581,86 @@ def remove_orphan_build_config(): | |||||||||||||||||||||||
| count = orphan_buildconfigs.count() | ||||||||||||||||||||||||
| orphan_buildconfigs.delete() | ||||||||||||||||||||||||
| log.info("Removed orphan BuildConfig objects.", count=count) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| @app.task(queue="web") | ||||||||||||||||||||||||
| def delete_old_build_objects( | ||||||||||||||||||||||||
| days=360 * 3, keep_recent=250, limit=20_000, max_projects=5_000, start=None | ||||||||||||||||||||||||
| ): | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
| Delete old Build objects that are not needed anymore. | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| We keep the most recent `keep_recent` builds per version, | ||||||||||||||||||||||||
| and delete builds that are older than `days` days. | ||||||||||||||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| :param limit: Maximum number of builds to delete in one execution. | ||||||||||||||||||||||||
| Keep our DB from being overwhelmed by deleting too many builds at once. | ||||||||||||||||||||||||
| :param max_projects: Maximum number of projects to process in one execution. | ||||||||||||||||||||||||
| Avoids iterating over all projects in one execution. | ||||||||||||||||||||||||
| :param start: Starting index for processing projects. | ||||||||||||||||||||||||
| Normally, this value comes from the cache, and is updated after each execution to continue from where it left off. | ||||||||||||||||||||||||
| But it can also be set to a specific value for manual execution. | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
| cache_key = "rtd-task:delete_old_build_objects_start" | ||||||||||||||||||||||||
| max_count = Project.objects.count() | ||||||||||||||||||||||||
| use_cache = start is None | ||||||||||||||||||||||||
| if use_cache: | ||||||||||||||||||||||||
| start = cache.get(cache_key, 0) | ||||||||||||||||||||||||
| if start >= max_count: | ||||||||||||||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||
| start = 0 | ||||||||||||||||||||||||
| end = start + max_projects | ||||||||||||||||||||||||
| if use_cache: | ||||||||||||||||||||||||
| cache.set(cache_key, end) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| cutoff_date = timezone.now() - timezone.timedelta(days=days) | ||||||||||||||||||||||||
| projects = Project.objects.all().order_by("pub_date").only("pk")[start:end] | ||||||||||||||||||||||||
| for project in projects: | ||||||||||||||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||
| # Delete builds associated with versions, keeping | ||||||||||||||||||||||||
| # the most recent `keep_recent` builds per version. | ||||||||||||||||||||||||
| for version in project.versions.exclude(builds=None).only("pk").iterator(): | ||||||||||||||||||||||||
| builds_to_delete = version.builds.filter( | ||||||||||||||||||||||||
| state__in=BUILD_FINAL_STATES, | ||||||||||||||||||||||||
| date__lt=cutoff_date, | ||||||||||||||||||||||||
| ).order_by("-date") | ||||||||||||||||||||||||
| limit -= _delete_builds(builds_to_delete, start=keep_recent, end=keep_recent + limit) | ||||||||||||||||||||||||
| if limit <= 0: | ||||||||||||||||||||||||
|
Comment on lines
+621
to
+626
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are not taking the slice here? I looks weird that we are adding all the builds to
Suggested change
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer if we perform the slice here since it will be clearer and avoid confusions.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Deleting in batches doesn't work over a sliced queryset. It's explained in the PR description.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, but this is pretty confusing to me. We are calling
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, we are calling _delete_builds(queryset, start, end), that seems explicit? |
||||||||||||||||||||||||
| return | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| # Delete builds that are not associated with any version, | ||||||||||||||||||||||||
| # keeping the most recent `keep_recent` builds per project. | ||||||||||||||||||||||||
| builds_to_delete = project.builds.filter( | ||||||||||||||||||||||||
| version=None, state__in=BUILD_FINAL_STATES, date__lt=cutoff_date | ||||||||||||||||||||||||
| ).order_by("-date") | ||||||||||||||||||||||||
| limit -= _delete_builds(builds_to_delete, start=keep_recent, end=keep_recent + limit) | ||||||||||||||||||||||||
| if limit <= 0: | ||||||||||||||||||||||||
| return | ||||||||||||||||||||||||
|
Comment on lines
+629
to
+636
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the builds are not associated to any version, this is because the version was deleted. We shouldn't keep only
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth it complicating this more? If the version is deleted, the build is never referenced by any version. We could keep 2x or 3x the builds here if we want to keep more builds, but honestly I think we are fine here.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What are we saving this builds for if they are not reference by any version?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So their builds page wouldn't immediately 404, and users had a history of previous builds. I'll be fine deleting builds when a version is deleted.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I think we either want:
but I wouldn't create another logic for this particular use case.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's simpler and faster to apply a global limit than grouping the query for versions and then delete builds for each one of them... |
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| def _delete_builds(builds, start: int, end: int) -> int: | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
| Delete builds from the queryset, starting from `start` index and ending at `end` index. | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| This also deletes the storage paths associated with the builds if they are in cold storage. | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| :returns: The number of builds deleted. | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
| # NOTE: we can't use a filter over an sliced queryset. | ||||||||||||||||||||||||
| paths_to_delete = [] | ||||||||||||||||||||||||
| for build in builds[start:end]: | ||||||||||||||||||||||||
| if build.cold_storage: | ||||||||||||||||||||||||
| paths_to_delete.append(build.storage_path) | ||||||||||||||||||||||||
stsewd marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| _, deleted = delete_in_batches(builds, start=start, end=end) | ||||||||||||||||||||||||
| remove_build_commands_storage_paths(paths_to_delete) | ||||||||||||||||||||||||
| return deleted.get("builds.Build", 0) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| @app.task(queue="web") | ||||||||||||||||||||||||
| def remove_build_commands_storage_paths(paths): | ||||||||||||||||||||||||
| """Remove the build commands from storage for the given paths.""" | ||||||||||||||||||||||||
| log.info("Removing paths from build commands storage.", paths=paths) | ||||||||||||||||||||||||
| for path in paths: | ||||||||||||||||||||||||
| try: | ||||||||||||||||||||||||
| build_commands_storage.delete(path) | ||||||||||||||||||||||||
stsewd marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||||||||||||
| except Exception: | ||||||||||||||||||||||||
| log.info("Failed to delete build commands from storage.", path=path, exc_info=True) | ||||||||||||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.