diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 704402409d4..0a7eb0bcdfe 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -611,20 +611,32 @@ def reconfigure_duplicate_cluster(original, cluster_outside): cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original) -def prepare_duplicates_for_delete(obj): +def prepare_duplicates_for_delete(obj, *, preview_only=False): """ Prepare duplicate clusters before deleting a Test, Engagement, Product, or Product_Type. Resets inside-scope duplicate FKs and reconfigures outside-scope clusters so that cascade_delete won't hit FK violations on the self-referential duplicate_finding field. + + When preview_only=True, no data is modified. Returns the count of outside-scope + findings that would be deleted (non-zero only when DUPLICATE_CLUSTER_CASCADE_DELETE=True). """ from dojo.utils import FINDING_SCOPE_FILTERS # noqa: PLC0415 circular import scope_field = FINDING_SCOPE_FILTERS.get(type(obj)) if scope_field is None: - logger.warning("prepare_duplicates_for_delete: unsupported object type %s", type(obj).__name__) - return + if not preview_only: + logger.warning("prepare_duplicates_for_delete: unsupported object type %s", type(obj).__name__) + return 0 if preview_only else None + + if preview_only: + if not settings.DUPLICATE_CLUSTER_CASCADE_DELETE: + return 0 + scope_ids_subquery = Finding.objects.filter(**{scope_field: obj}).values_list("id", flat=True) + return Finding.objects.filter( + duplicate_finding_id__in=scope_ids_subquery, + ).exclude(id__in=scope_ids_subquery).count() logger.debug("prepare_duplicates_for_delete: %s %d", type(obj).__name__, obj.id) @@ -637,7 +649,7 @@ def prepare_duplicates_for_delete(obj): if not scope_ids_subquery.exists(): logger.debug("no findings in scope, nothing to prepare") - return + return None # Bulk-reset inside-scope duplicates: single UPDATE instead of per-original mass_model_updater. # Clears the duplicate_finding FK so cascade_delete won't trip over dangling self-references. @@ -694,6 +706,8 @@ def prepare_duplicates_for_delete(obj): outside_orphan_count, ) + return None + @receiver(pre_delete, sender=Test) def test_pre_delete(sender, instance, **kwargs): @@ -830,13 +844,15 @@ def _bulk_delete_findings_internal(finding_qs, chunk_size=1000, *, order_desc=Fa ) -def bulk_delete_findings(finding_qs, chunk_size=1000, cascade_root=None, *, order_desc=False): +def bulk_delete_findings(finding_qs, chunk_size=1000, cascade_root=None, *, order_desc=False, preview_only=False): """ Entry point; may delegate to Pro via settings.BULK_DELETE_FINDINGS_METHOD. cascade_root: optional dict describing the top-level object whose cascade triggered this bulk delete (e.g. {"model": "dojo.engagement", "pk": 9}). Ignored by OSS when no custom method is configured. + + preview_only: when True, return a ``{product_id: finding_count}`` dict without deleting anything. """ from dojo.utils import get_custom_method # noqa: PLC0415 circular import @@ -846,7 +862,10 @@ def bulk_delete_findings(finding_qs, chunk_size=1000, cascade_root=None, *, orde chunk_size=chunk_size, cascade_root=cascade_root, order_desc=order_desc, + preview_only=preview_only, ) + if preview_only: + return None return _bulk_delete_findings_internal(finding_qs, chunk_size=chunk_size, order_desc=order_desc) diff --git a/dojo/management/commands/import_all_unittest_scans.py b/dojo/management/commands/import_all_unittest_scans.py index cc4dd266250..6f5f5040294 100644 --- a/dojo/management/commands/import_all_unittest_scans.py +++ b/dojo/management/commands/import_all_unittest_scans.py @@ -7,6 +7,7 @@ from inspect import isclass from pathlib import Path +from django.conf import settings from django.core.management.base import BaseCommand from django.urls import reverse from django.utils import timezone @@ -43,6 +44,7 @@ def add_arguments(self, parser): parser.add_argument("--engagements-per-product", type=int, default=50, help="Number of engagements per product before a new product is created, defaults to 50") parser.add_argument("--products-per-product-type", type=int, default=15, help="Number of products per product type before a new product type is created, defaults to 15") parser.add_argument("--number-of-runs", type=int, default=1, help="Number of times to run the import of all sample scans, defaults to 1") + parser.add_argument("--background-import", action="store_true", default=False, help="Use async/background imports (Pro feature, default: False)") def get_test_admin(self, *args, **kwargs): return User.objects.get(username="admin") @@ -64,7 +66,7 @@ def import_scan(self, payload, expected_http_status_code): def import_scan_with_params(self, filename, scan_type="ZAP Scan", engagement=1, minimum_severity="Low", *, active=True, verified=False, push_to_jira=None, endpoint_to_add=None, tags=None, close_old_findings=False, group_by=None, engagement_name=None, product_name=None, product_type_name=None, auto_create_context=None, expected_http_status_code=201, test_title=None, - scan_date=None, service=None, force_active=True, force_verified=True): + scan_date=None, service=None, force_active=True, force_verified=True, background_import=False): with (Path("unittests/scans") / filename).open(encoding="utf-8") as testfile: payload = { @@ -73,6 +75,7 @@ def import_scan_with_params(self, filename, scan_type="ZAP Scan", engagement=1, "file": testfile, "version": "1.0.1", "close_old_findings": close_old_findings, + "background_import": background_import, } if active is not None: @@ -119,7 +122,7 @@ def import_scan_with_params(self, filename, scan_type="ZAP Scan", engagement=1, return self.import_scan(payload, expected_http_status_code) - def import_all_unittest_scans(self, product_name_prefix=None, tests_per_engagement=10, engagements_per_product=50, products_per_product_type=15, *, include_very_big_scans=False, **kwargs): + def import_all_unittest_scans(self, product_name_prefix=None, tests_per_engagement=10, engagements_per_product=50, products_per_product_type=15, *, include_very_big_scans=False, background_import=False, **kwargs): logger.info("product_name_prefix: %s, tests_per_engagement: %s, engagements_per_product: %s, products_per_product_type: %s", product_name_prefix, tests_per_engagement, engagements_per_product, products_per_product_type) product_type_prefix = "Sample scans " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") product_type_index = 1 @@ -172,6 +175,7 @@ def import_all_unittest_scans(self, product_name_prefix=None, tests_per_engageme filename=module_name + "/" + scan_file.name, scan_type=parser.get_scan_types()[0], engagement=eng.id, + background_import=background_import, ) # logger.debug(f"Result of import: {result}") # raise Exception(f"Scan {scan_file.name} is not expected to be imported, but it was.") @@ -191,6 +195,7 @@ def import_all_unittest_scans(self, product_name_prefix=None, tests_per_engageme logger.error("Error importing scan %s: %s", scan, message) def handle(self, *args, **options): + settings.SECURE_SSL_REDIRECT = False logger.info("EXPERIMENTAL: This command may be changed/deprecated/removed without prior notice.") for i in range(options.get("number_of_runs", 1)): product_name_prefix = options.get("product_name_prefix") @@ -203,4 +208,5 @@ def handle(self, *args, **options): engagements_per_product=options.get("engagements_per_product"), products_per_product_type=options.get("products_per_product_type"), include_very_big_scans=options.get("include_very_big_scans"), + background_import=options.get("background_import"), ) diff --git a/dojo/models.py b/dojo/models.py index 153177bc20e..8cff7092ef6 100644 --- a/dojo/models.py +++ b/dojo/models.py @@ -1799,14 +1799,11 @@ def __hash__(self): def __eq__(self, other): if isinstance(other, Endpoint): - # Check if the contents of the endpoint match contents_match = str(self) == str(other) - # Determine if products should be used in the equation - if self.product is not None and other.product is not None: - # Check if the products are the same - products_match = (self.product) == other.product - # Check if the contents match - return products_match and contents_match + # Use product_id (cached integer) instead of self.product to avoid + # triggering a FK lookup on every comparison inside NestedObjects.add_edge. + if self.product_id is not None and other.product_id is not None: + return self.product_id == other.product_id and contents_match return contents_match return NotImplemented diff --git a/dojo/utils_cascade_delete.py b/dojo/utils_cascade_delete.py index 044c45a539e..992e072411c 100644 --- a/dojo/utils_cascade_delete.py +++ b/dojo/utils_cascade_delete.py @@ -9,6 +9,7 @@ """ import logging +from collections import Counter from django.db import OperationalError, models, transaction from django.db.models.sql.compiler import SQLDeleteCompiler @@ -60,7 +61,7 @@ def execute_update_sql(query, **updatespec): return execute_compiled_sql(*get_update_sql(query, **updatespec)) -def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations=None, skip_m2m_for=None, base_model=None, level=0): +def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations=None, skip_m2m_for=None, base_model=None, level=0, *, preview_only=False, counter=None, preview_models=None): """ Recursively walk Django model relations and execute compiled SQL to perform cascade DELETE / SET_NULL on related objects without the Collector. @@ -80,9 +81,15 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations by the caller (avoids redundant tag count queries). base_model: Root model class (set automatically on first call). level: Recursion depth (for logging only). + preview_only: When True, count instead of delete (dry-run mode). + counter: Counter accumulator for preview_only mode. Updated in place. + preview_models: Optional set of model __name__ strings. When set, only + COUNT models in this set during preview_only; still recurse + through all models to reach tracked descendants. Returns: Number of records deleted at this level (0 at level 0 since root is not deleted). + In preview_only mode always returns 0; counts are accumulated in ``counter``. """ if skip_relations is None: @@ -91,6 +98,8 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations skip_m2m_for = set() if base_model is None: base_model = from_model + if preview_only and counter is None: + counter = Counter() instance_pk_query = instance_pk_query.values_list("pk").order_by() @@ -118,27 +127,52 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations filterspec = {f"{fk_column}__in": models.Subquery(instance_pk_query)} if on_delete_name == "SET_NULL": - count = execute_update_sql( - related_model.objects.filter(**filterspec), - **{fk_column: None}, - ) - logger.debug( - "cascade_delete: SET NULL on %d %s records", - count, related_model.__name__, - ) + if not preview_only: + count = execute_update_sql( + related_model.objects.filter(**filterspec), + **{fk_column: None}, + ) + logger.debug( + "cascade_delete: SET NULL on %d %s records", + count, related_model.__name__, + ) + # In preview_only mode SET_NULL means objects survive — nothing to count. elif on_delete_name == "CASCADE": related_pk_query = related_model.objects.filter(**filterspec).values_list( related_model._meta.pk.name, ) - # Recurse into children first (bottom-up deletion) - cascade_delete_related_objects( - related_model, related_pk_query, - skip_relations=skip_relations, - skip_m2m_for=skip_m2m_for, - base_model=base_model, - level=level + 1, - ) + if preview_only: + # Count related objects at this level before recursing into their children. + # Skip COUNT when preview_models is set and this model is not in it. + if preview_models is None or related_model.__name__ in preview_models: + n = related_model.objects.filter(**filterspec).count() + if n: + counter[related_model.__name__] += n + logger.debug( + "cascade_delete preview: counted %d %s records", + n, related_model.__name__, + ) + # Recurse to count grandchildren even when n==0 (subquery may still match). + cascade_delete_related_objects( + related_model, related_pk_query, + skip_relations=skip_relations, + skip_m2m_for=skip_m2m_for, + base_model=base_model, + level=level + 1, + preview_only=True, + counter=counter, + preview_models=preview_models, + ) + else: + # Recurse into children first (bottom-up deletion) + cascade_delete_related_objects( + related_model, related_pk_query, + skip_relations=skip_relations, + skip_m2m_for=skip_m2m_for, + base_model=base_model, + level=level + 1, + ) elif on_delete_name == "DO_NOTHING": logger.debug( @@ -152,39 +186,40 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations on_delete_name, from_model.__name__, related_model.__name__, ) - # Clear M2M through tables before deleting (not discovered by _meta.related_objects). - # Skip if the caller already handled M2M cleanup for this model (e.g. bulk_clear_finding_m2m). - if from_model not in skip_m2m_for: - from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import - - bulk_remove_all_tags(from_model, instance_pk_query) - - # Clear all M2M through tables — both forward (from_model._meta.many_to_many) - # and reverse (other models with ManyToManyField pointing to from_model). - # Forward M2M fields use field.remote_field.through, reverse use field.through. - if from_model not in skip_m2m_for: - m2m_through_models = set() - for field_info in from_model._meta.get_fields(): - if hasattr(field_info, "tag_options"): - continue - through = getattr(field_info, "through", None) or getattr(getattr(field_info, "remote_field", None), "through", None) - if through is not None: - m2m_through_models.add(through) - - for through_model in m2m_through_models: - fk_column = None - for field in through_model._meta.get_fields(): - if hasattr(field, "related_model") and field.related_model is from_model: - fk_column = field.column - break - if fk_column: - filterspec_m2m = {f"{fk_column}__in": models.Subquery(instance_pk_query)} - m2m_count = execute_delete_sql(through_model.objects.filter(**filterspec_m2m)) - if m2m_count: - logger.debug( - "cascade_delete: cleared %d rows from M2M %s", - m2m_count, through_model._meta.db_table, - ) + if not preview_only: + # Clear M2M through tables before deleting (not discovered by _meta.related_objects). + # Skip if the caller already handled M2M cleanup for this model (e.g. bulk_clear_finding_m2m). + if from_model not in skip_m2m_for: + from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import + + bulk_remove_all_tags(from_model, instance_pk_query) + + # Clear all M2M through tables — both forward (from_model._meta.many_to_many) + # and reverse (other models with ManyToManyField pointing to from_model). + # Forward M2M fields use field.remote_field.through, reverse use field.through. + if from_model not in skip_m2m_for: + m2m_through_models = set() + for field_info in from_model._meta.get_fields(): + if hasattr(field_info, "tag_options"): + continue + through = getattr(field_info, "through", None) or getattr(getattr(field_info, "remote_field", None), "through", None) + if through is not None: + m2m_through_models.add(through) + + for through_model in m2m_through_models: + fk_column = None + for field in through_model._meta.get_fields(): + if hasattr(field, "related_model") and field.related_model is from_model: + fk_column = field.column + break + if fk_column: + filterspec_m2m = {f"{fk_column}__in": models.Subquery(instance_pk_query)} + m2m_count = execute_delete_sql(through_model.objects.filter(**filterspec_m2m)) + if m2m_count: + logger.debug( + "cascade_delete: cleared %d rows from M2M %s", + m2m_count, through_model._meta.db_table, + ) # At level 0, do NOT delete root records — the caller handles that # (e.g. via ORM obj.delete() to fire Django signals). @@ -195,6 +230,9 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations ) return 0 + if preview_only: + return 0 + # At deeper levels, delete records after their children are gone filterspec = {f"{from_model._meta.pk.name}__in": models.Subquery(instance_pk_query)} del_query = from_model.objects.filter(**filterspec) diff --git a/unittests/test_cascade_delete.py b/unittests/test_cascade_delete.py new file mode 100644 index 00000000000..f24667e44f8 --- /dev/null +++ b/unittests/test_cascade_delete.py @@ -0,0 +1,179 @@ +""" +Unit tests for cascade_delete_related_objects() in dojo.utils_cascade_delete. + +Focused on preview mode and the preview_models filter parameter. +""" + +import logging +from collections import Counter + +from django.utils import timezone + +from dojo.models import ( + Engagement, + Finding, + Product, + Product_Type, + Test, + Test_Type, + User, + UserContactInfo, +) +from dojo.utils_cascade_delete import cascade_delete_related_objects + +from .dojo_test_case import DojoTestCase + +logger = logging.getLogger(__name__) + + +class TestCascadeDeletePreviewModels(DojoTestCase): + + """Tests for cascade_delete_related_objects(preview_only=True, preview_models=...).""" + + def setUp(self): + super().setUp() + self.testuser = User.objects.create( + username="cascade_preview_test_user", + is_staff=True, + is_superuser=True, + ) + UserContactInfo.objects.create(user=self.testuser, block_execution=True) + self.system_settings(enable_deduplication=False) + self.system_settings(enable_product_grade=False) + + self.product_type = Product_Type.objects.create(name="Cascade Preview PT") + self.product = Product.objects.create( + name="Cascade Preview Product", + description="Test", + prod_type=self.product_type, + ) + self.test_type = Test_Type.objects.get_or_create(name="Manual Test")[0] + self.engagement = Engagement.objects.create( + name="Cascade Preview Engagement", + product=self.product, + target_start=timezone.now(), + target_end=timezone.now(), + ) + self.test = Test.objects.create( + engagement=self.engagement, + test_type=self.test_type, + target_start=timezone.now(), + target_end=timezone.now(), + ) + + def _create_finding(self, title="Finding"): + return Finding.objects.create( + test=self.test, + title=title, + severity="High", + description="Test", + mitigation="Test", + impact="Test", + reporter=self.testuser, + ) + + def test_preview_counts_cascade_relations(self): + """preview_only=True accumulates counts into counter without deleting.""" + self._create_finding("F1") + self._create_finding("F2") + + counter = Counter() + cascade_delete_related_objects( + Engagement, + Engagement.objects.filter(pk=self.engagement.pk), + preview_only=True, + counter=counter, + ) + + self.assertGreater(counter["Test"], 0) + self.assertGreater(counter["Finding"], 0) + # Nothing deleted + self.assertTrue(Engagement.objects.filter(pk=self.engagement.pk).exists()) + self.assertTrue(Finding.objects.filter(test=self.test).count() == 2) + + def test_preview_models_skips_count_for_untracked(self): + """With preview_models set, untracked models are not counted.""" + self._create_finding("F1") + + tracked = {"Test", "Finding"} + counter = Counter() + cascade_delete_related_objects( + Engagement, + Engagement.objects.filter(pk=self.engagement.pk), + preview_only=True, + counter=counter, + preview_models=tracked, + ) + + for model_name in counter: + self.assertIn(model_name, tracked, msg=f"{model_name} should not be counted") + + def test_preview_models_still_counts_tracked(self): + """With preview_models set, tracked models ARE counted.""" + self._create_finding("F1") + self._create_finding("F2") + + counter = Counter() + cascade_delete_related_objects( + Engagement, + Engagement.objects.filter(pk=self.engagement.pk), + preview_only=True, + counter=counter, + preview_models={"Test", "Finding"}, + ) + + self.assertEqual(counter["Test"], 1) + self.assertEqual(counter["Finding"], 2) + + def test_preview_none_preview_models_counts_all(self): + """preview_models=None (default) counts every CASCADE relation.""" + self._create_finding("F1") + + counter_full = Counter() + cascade_delete_related_objects( + Engagement, + Engagement.objects.filter(pk=self.engagement.pk), + preview_only=True, + counter=counter_full, + preview_models=None, + ) + + counter_filtered = Counter() + cascade_delete_related_objects( + Engagement, + Engagement.objects.filter(pk=self.engagement.pk), + preview_only=True, + counter=counter_filtered, + preview_models={"Test", "Finding"}, + ) + + # Full walk has at least as many distinct model types as filtered + self.assertGreaterEqual(len(counter_full), len(counter_filtered)) + + def test_preview_does_not_delete(self): + """preview_only=True with preview_models never deletes any rows.""" + f = self._create_finding("F1") + + cascade_delete_related_objects( + Engagement, + Engagement.objects.filter(pk=self.engagement.pk), + preview_only=True, + counter=Counter(), + preview_models={"Test", "Finding"}, + ) + + self.assertTrue(Finding.objects.filter(pk=f.pk).exists()) + self.assertTrue(Test.objects.filter(pk=self.test.pk).exists()) + self.assertTrue(Engagement.objects.filter(pk=self.engagement.pk).exists()) + + def test_preview_empty_scope_returns_empty_counter(self): + """No matching records → empty counter.""" + counter = Counter() + cascade_delete_related_objects( + Engagement, + Engagement.objects.filter(pk=999999), + preview_only=True, + counter=counter, + preview_models={"Test", "Finding"}, + ) + self.assertEqual(sum(counter.values()), 0) diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index b2fd4c64fb1..78d612dfdec 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -488,3 +488,67 @@ def test_delete_product_with_reverse_m2m_relations(self): # but its accepted_findings M2M entries should be gone self.assertTrue(Risk_Acceptance.objects.filter(id=ra_id).exists()) self.assertEqual(Risk_Acceptance.objects.get(id=ra_id).accepted_findings.count(), 0) + + +@override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=False) +class TestPrepareDuplicatesForDeletePreview(TestPrepareDuplicatesForDelete): + + """Tests for prepare_duplicates_for_delete(preview_only=True) — no data modified.""" + + def test_preview_returns_zero_no_outside_duplicates(self): + """No outside-scope duplicates → count is 0.""" + self._create_finding(self.test1, "F1") + count = prepare_duplicates_for_delete(self.test1, preview_only=True) + self.assertEqual(count, 0) + + def test_preview_returns_zero_cascade_delete_false(self): + """DUPLICATE_CLUSTER_CASCADE_DELETE=False → outside dupes survive, count=0.""" + original = self._create_finding(self.test1, "Original") + outside_dupe = self._create_finding(self.test2, "Outside Dupe") + self._make_duplicate(outside_dupe, original) + + count = prepare_duplicates_for_delete(self.test1, preview_only=True) + self.assertEqual(count, 0) + + @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True) + def test_preview_counts_outside_scope_duplicates(self): + """DUPLICATE_CLUSTER_CASCADE_DELETE=True → outside dupe counted.""" + original = self._create_finding(self.test1, "Original") + outside_dupe = self._create_finding(self.test2, "Outside Dupe") + self._make_duplicate(outside_dupe, original) + + count = prepare_duplicates_for_delete(self.test1, preview_only=True) + self.assertEqual(count, 1) + + @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True) + def test_preview_counts_multiple_outside_duplicates(self): + """Multiple outside-scope duplicates across originals are all counted.""" + original_a = self._create_finding(self.test1, "Original A") + original_b = self._create_finding(self.test1, "Original B") + dupe_a = self._create_finding(self.test2, "Dupe of A") + dupe_b = self._create_finding(self.test2, "Dupe of B") + dupe_b2 = self._create_finding(self.test3, "Dupe of B (2)") + self._make_duplicate(dupe_a, original_a) + self._make_duplicate(dupe_b, original_b) + self._make_duplicate(dupe_b2, original_b) + + count = prepare_duplicates_for_delete(self.test1, preview_only=True) + self.assertEqual(count, 3) + + @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True) + def test_preview_does_not_modify_data(self): + """preview_only=True must not change any Finding rows.""" + original = self._create_finding(self.test1, "Original") + outside_dupe = self._create_finding(self.test2, "Outside Dupe") + self._make_duplicate(outside_dupe, original) + + prepare_duplicates_for_delete(self.test1, preview_only=True) + + outside_dupe.refresh_from_db() + self.assertTrue(outside_dupe.duplicate) + self.assertEqual(outside_dupe.duplicate_finding_id, original.id) + + def test_preview_returns_zero_for_unsupported_type(self): + """Unsupported object type → 0, no warning logged (preview_only is silent).""" + count = prepare_duplicates_for_delete(object(), preview_only=True) + self.assertEqual(count, 0)