diff --git a/CHANGELOG.md b/CHANGELOG.md index 79bc5a8719..f6c4687f85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to - ✨(frontend) Can print a doc #1832 - ✨(backend) manage reconciliation requests for user accounts #1878 +- ✨(backend) add management command to reset a Document #1882 ### Changed diff --git a/src/backend/core/management/commands/clean_document.py b/src/backend/core/management/commands/clean_document.py new file mode 100644 index 0000000000..6b36e8ac05 --- /dev/null +++ b/src/backend/core/management/commands/clean_document.py @@ -0,0 +1,150 @@ +"""Clean a document by resetting it (keeping its title) and deleting all descendants.""" + +import logging + +from django.conf import settings +from django.core.files.storage import default_storage +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction +from django.db.models import Q + +from botocore.exceptions import ClientError + +from core.choices import LinkReachChoices, LinkRoleChoices, RoleChoices +from core.models import Document, DocumentAccess, Invitation, Thread + +logger = logging.getLogger("impress.commands.clean_document") + + +class Command(BaseCommand): + """Reset a document (keeping its title) and delete all its descendants.""" + + help = __doc__ + + def add_arguments(self, parser): + """Define command arguments.""" + parser.add_argument( + "document_id", + type=str, + help="UUID of the document to clean", + ) + parser.add_argument( + "-f", + "--force", + action="store_true", + default=False, + help="Force command execution despite DEBUG is set to False", + ) + parser.add_argument( + "-t", + "--title", + type=str, + default=None, + help="Update the document title to this value", + ) + parser.add_argument( + "--link_reach", + type=str, + default=LinkReachChoices.RESTRICTED, + choices=LinkReachChoices, + help="Update the link_reach to this value", + ) + parser.add_argument( + "--link_role", + type=str, + default=LinkRoleChoices.READER, + choices=LinkRoleChoices, + help="update the link_role to this value", + ) + + def handle(self, *args, **options): + """Execute the clean_document command.""" + if not settings.DEBUG and not options["force"]: + raise CommandError( + "This command is not meant to be used in production environment " + "except you know what you are doing, if so use --force parameter" + ) + + document_id = options["document_id"] + + try: + document = Document.objects.get(pk=document_id) + except (Document.DoesNotExist, ValueError) as err: + raise CommandError(f"Document {document_id} does not exist.") from err + + descendants = list(document.get_descendants()) + descendant_ids = [doc.id for doc in descendants] + all_documents = [document, *descendants] + + # Collect all attachment keys before the transaction clears them + all_attachment_keys = [] + for doc in all_documents: + all_attachment_keys.extend(doc.attachments) + + self.stdout.write( + f"Cleaning document {document_id} and deleting " + f"{len(descendants)} descendant(s)..." + ) + + with transaction.atomic(): + # Clean accesses and invitations on the root document + access_count, _ = DocumentAccess.objects.filter( + Q(document_id=document.id) & ~Q(role=RoleChoices.OWNER) + ).delete() + self.stdout.write(f"Deleted {access_count} access(es) on root document.") + + invitation_count, _ = Invitation.objects.filter( + document_id=document.id + ).delete() + self.stdout.write( + f"Deleted {invitation_count} invitation(s) on root document." + ) + + thread_count, _ = Thread.objects.filter(document_id=document.id).delete() + self.stdout.write(f"Deleted {thread_count} thread(s) on root document.") + + # Reset root document fields + update_fields = { + "excerpt": None, + "link_reach": options["link_reach"], + "link_role": options["link_role"], + "attachments": [], + } + if options["title"] is not None: + update_fields["title"] = options["title"] + Document.objects.filter(id=document.id).update(**update_fields) + + if options["title"] is not None: + self.stdout.write( + f'Reset fields on root document (title set to "{options["title"]}").' + ) + else: + self.stdout.write("Reset fields on root document (title kept).") + + # Delete all descendants (cascades accesses and invitations) + if descendants: + deleted_count, _ = Document.objects.filter( + id__in=descendant_ids + ).delete() + self.stdout.write(f"Deleted {deleted_count} descendant(s).") + + # Delete S3 content outside the transaction (S3 is not transactional) + s3_client = default_storage.connection.meta.client + bucket = default_storage.bucket_name + + for doc in all_documents: + try: + s3_client.delete_object(Bucket=bucket, Key=doc.file_key) + except ClientError: + logger.warning("Failed to delete S3 file for document %s", doc.id) + + self.stdout.write(f"Deleted S3 content for {len(all_documents)} document(s).") + + for key in all_attachment_keys: + try: + s3_client.delete_object(Bucket=bucket, Key=key) + except ClientError: + logger.warning("Failed to delete S3 attachment %s", key) + + self.stdout.write(f"Deleted {len(all_attachment_keys)} attachment(s) from S3.") + self.stdout.write("Done.") diff --git a/src/backend/core/tests/commands/test_clean_document.py b/src/backend/core/tests/commands/test_clean_document.py new file mode 100644 index 0000000000..a1568b5dcf --- /dev/null +++ b/src/backend/core/tests/commands/test_clean_document.py @@ -0,0 +1,313 @@ +"""Unit tests for the `clean_document` management command.""" + +import random +from unittest import mock +from uuid import uuid4 + +from django.core.management import CommandError, call_command + +import pytest +from botocore.exceptions import ClientError + +from core import choices, factories, models +from core.choices import LinkReachChoices, LinkRoleChoices + +pytestmark = pytest.mark.django_db + + +def test_clean_document_with_descendants(settings): + """The command should reset the root (keeping title) and delete descendants.""" + settings.DEBUG = True + + # Create a root document with subdocuments + root = factories.DocumentFactory( + title="Root", + link_reach=LinkReachChoices.PUBLIC, + link_role=LinkRoleChoices.EDITOR, + ) + child = factories.DocumentFactory( + parent=root, + title="Child", + link_reach=LinkReachChoices.AUTHENTICATED, + link_role=LinkRoleChoices.EDITOR, + ) + grandchild = factories.DocumentFactory( + parent=child, + title="Grandchild", + ) + + # Create accesses and invitations + factories.UserDocumentAccessFactory.create_batch( + 5, + document=root, + role=random.choice( + [ + role + for role in choices.RoleChoices + if role not in choices.PRIVILEGED_ROLES + ], + ), + ) + # One owner role + factories.UserDocumentAccessFactory(document=root, role=choices.RoleChoices.OWNER) + factories.UserDocumentAccessFactory(document=child) + factories.InvitationFactory(document=root) + factories.InvitationFactory(document=child) + factories.ThreadFactory.create_batch(5, document=root) + + assert models.Invitation.objects.filter(document=root).exists() + assert models.Thread.objects.filter(document=root).exists() + assert models.DocumentAccess.objects.filter(document=root).exists() + + with mock.patch( + "core.management.commands.clean_document.default_storage" + ) as mock_storage: + call_command("clean_document", str(root.id), "--force") + + # Root document should still exist with title kept and other fields reset + root.refresh_from_db() + assert root.title == "Root" + assert root.excerpt is None + assert root.link_reach == LinkReachChoices.RESTRICTED + assert root.link_role == LinkRoleChoices.READER + assert root.attachments == [] + + # Accesses and invitations on root should be deleted. Only owner should be kept + keeping_accesses = list(models.DocumentAccess.objects.filter(document=root)) + assert len(keeping_accesses) == 1 + assert keeping_accesses[0].role == models.RoleChoices.OWNER + assert not models.Invitation.objects.filter(document=root).exists() + assert not models.Thread.objects.filter(document=root).exists() + + # Descendants should be deleted entirely + assert not models.Document.objects.filter(id__in=[child.id, grandchild.id]).exists() + + # Root should have no descendants + root.refresh_from_db() + assert root.get_descendants().count() == 0 + + # S3 delete should have been called for document files + attachments + delete_calls = mock_storage.connection.meta.client.delete_object.call_args_list + assert len(delete_calls) == 3 + + +def test_clean_document_invalid_uuid(settings): + """The command should raise an error for a non-existent document.""" + settings.DEBUG = True + + fake_id = str(uuid4()) + with pytest.raises(CommandError, match=f"Document {fake_id} does not exist."): + call_command("clean_document", fake_id, "--force") + + +def test_clean_document_no_force_in_production(settings): + """The command should require --force when DEBUG is False.""" + settings.DEBUG = False + + doc = factories.DocumentFactory() + with pytest.raises(CommandError, match="not meant to be used in production"): + call_command("clean_document", str(doc.id)) + + +def test_clean_document_single_document(settings): + """The command should work on a single document without children.""" + settings.DEBUG = True + + doc = factories.DocumentFactory( + title="Single", + link_reach=LinkReachChoices.PUBLIC, + link_role=LinkRoleChoices.EDITOR, + ) + factories.UserDocumentAccessFactory.create_batch( + 5, + document=doc, + role=random.choice( + [ + role + for role in choices.RoleChoices + if role not in choices.PRIVILEGED_ROLES + ], + ), + ) + # One owner role + factories.UserDocumentAccessFactory(document=doc, role=choices.RoleChoices.OWNER) + factories.ThreadFactory.create_batch(5, document=doc) + factories.InvitationFactory(document=doc) + + with mock.patch( + "core.management.commands.clean_document.default_storage" + ) as mock_storage: + call_command("clean_document", str(doc.id), "--force") + + # Accesses and invitations on root should be deleted. Only owner should be kept + keeping_accesses = list(models.DocumentAccess.objects.filter(document=doc)) + assert len(keeping_accesses) == 1 + assert keeping_accesses[0].role == models.RoleChoices.OWNER + assert not models.Invitation.objects.filter(document=doc).exists() + assert not models.Thread.objects.filter(document=doc).exists() + + doc.refresh_from_db() + assert doc.title == "Single" + assert doc.excerpt is None + assert doc.link_reach == LinkReachChoices.RESTRICTED + assert doc.link_role == LinkRoleChoices.READER + assert doc.attachments == [] + + mock_storage.connection.meta.client.delete_object.assert_called_once() + + +def test_clean_document_with_title_option(settings): + """The --title option should update the document title.""" + settings.DEBUG = True + + doc = factories.DocumentFactory( + title="Old Title", + link_reach=LinkReachChoices.PUBLIC, + link_role=LinkRoleChoices.EDITOR, + ) + + with mock.patch("core.management.commands.clean_document.default_storage"): + call_command("clean_document", str(doc.id), "--force", "--title", "New Title") + + doc.refresh_from_db() + assert doc.title == "New Title" + assert doc.excerpt is None + assert doc.link_reach == LinkReachChoices.RESTRICTED + assert doc.link_role == LinkRoleChoices.READER + assert doc.attachments == [] + + +def test_clean_document_deletes_attachments_from_s3(settings): + """The command should delete attachment files from S3.""" + settings.DEBUG = True + + root = factories.DocumentFactory( + attachments=["root-id/attachments/file1.png", "root-id/attachments/file2.pdf"], + ) + child = factories.DocumentFactory( + parent=root, + attachments=["child-id/attachments/file3.png"], + ) + + with mock.patch( + "core.management.commands.clean_document.default_storage" + ) as mock_storage: + call_command("clean_document", str(root.id), "--force") + + delete_calls = mock_storage.connection.meta.client.delete_object.call_args_list + deleted_keys = [call.kwargs["Key"] for call in delete_calls] + + # Document files (root + child) + assert root.file_key in deleted_keys + assert child.file_key in deleted_keys + + # Attachment files + assert "root-id/attachments/file1.png" in deleted_keys + assert "root-id/attachments/file2.pdf" in deleted_keys + assert "child-id/attachments/file3.png" in deleted_keys + + assert len(delete_calls) == 5 + + +def test_clean_document_s3_errors_do_not_stop_command(settings): + """S3 deletion errors should be logged but not stop the command.""" + settings.DEBUG = True + + doc = factories.DocumentFactory( + attachments=["doc-id/attachments/file1.png"], + ) + + with mock.patch( + "core.management.commands.clean_document.default_storage" + ) as mock_storage: + mock_storage.connection.meta.client.delete_object.side_effect = ClientError( + {"Error": {"Code": "500", "Message": "Internal Error"}}, + "DeleteObject", + ) + # Command should complete without raising + call_command("clean_document", str(doc.id), "--force") + + +def test_clean_document_with_options(settings): + """Run the command using optional argument link_reach and link_role.""" + + settings.DEBUG = True + + # Create a root document with subdocuments + root = factories.DocumentFactory( + title="Root", + link_reach=LinkReachChoices.PUBLIC, + link_role=LinkRoleChoices.READER, + ) + child = factories.DocumentFactory( + parent=root, + title="Child", + link_reach=LinkReachChoices.AUTHENTICATED, + link_role=LinkRoleChoices.EDITOR, + ) + grandchild = factories.DocumentFactory( + parent=child, + title="Grandchild", + ) + + # Create accesses and invitations + factories.UserDocumentAccessFactory.create_batch( + 5, + document=root, + role=random.choice( + [ + role + for role in choices.RoleChoices + if role not in choices.PRIVILEGED_ROLES + ], + ), + ) + # One owner role + factories.UserDocumentAccessFactory(document=root, role=choices.RoleChoices.OWNER) + factories.UserDocumentAccessFactory(document=child) + factories.InvitationFactory(document=root) + factories.InvitationFactory(document=child) + factories.ThreadFactory.create_batch(5, document=root) + + assert models.Invitation.objects.filter(document=root).exists() + assert models.Thread.objects.filter(document=root).exists() + assert models.DocumentAccess.objects.filter(document=root).exists() + + with mock.patch( + "core.management.commands.clean_document.default_storage" + ) as mock_storage: + call_command( + "clean_document", + str(root.id), + "--force", + "--link_reach", + "public", + "--link_role", + "editor", + ) + + # Root document should still exist with title kept and other fields reset + root.refresh_from_db() + assert root.title == "Root" + assert root.excerpt is None + assert root.link_reach == LinkReachChoices.PUBLIC + assert root.link_role == LinkRoleChoices.EDITOR + assert root.attachments == [] + + # Accesses and invitations on root should be deleted. Only owner should be kept + keeping_accesses = list(models.DocumentAccess.objects.filter(document=root)) + assert len(keeping_accesses) == 1 + assert keeping_accesses[0].role == models.RoleChoices.OWNER + assert not models.Invitation.objects.filter(document=root).exists() + assert not models.Thread.objects.filter(document=root).exists() + + # Descendants should be deleted entirely + assert not models.Document.objects.filter(id__in=[child.id, grandchild.id]).exists() + + # Root should have no descendants + root.refresh_from_db() + assert root.get_descendants().count() == 0 + + # S3 delete should have been called for document files + attachments + delete_calls = mock_storage.connection.meta.client.delete_object.call_args_list + assert len(delete_calls) == 3