Skip to content
29 changes: 24 additions & 5 deletions dojo/finding/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,20 +611,32 @@ def reconfigure_duplicate_cluster(original, cluster_outside):
cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original)


def prepare_duplicates_for_delete(obj):
def prepare_duplicates_for_delete(obj, *, preview_only=False):
"""
Prepare duplicate clusters before deleting a Test, Engagement, Product, or Product_Type.

Resets inside-scope duplicate FKs and reconfigures outside-scope clusters
so that cascade_delete won't hit FK violations on the self-referential
duplicate_finding field.

When preview_only=True, no data is modified. Returns the count of outside-scope
findings that would be deleted (non-zero only when DUPLICATE_CLUSTER_CASCADE_DELETE=True).
"""
from dojo.utils import FINDING_SCOPE_FILTERS # noqa: PLC0415 circular import

scope_field = FINDING_SCOPE_FILTERS.get(type(obj))
if scope_field is None:
logger.warning("prepare_duplicates_for_delete: unsupported object type %s", type(obj).__name__)
return
if not preview_only:
logger.warning("prepare_duplicates_for_delete: unsupported object type %s", type(obj).__name__)
return 0 if preview_only else None

if preview_only:
if not settings.DUPLICATE_CLUSTER_CASCADE_DELETE:
return 0
scope_ids_subquery = Finding.objects.filter(**{scope_field: obj}).values_list("id", flat=True)
return Finding.objects.filter(
duplicate_finding_id__in=scope_ids_subquery,
).exclude(id__in=scope_ids_subquery).count()

logger.debug("prepare_duplicates_for_delete: %s %d", type(obj).__name__, obj.id)

Expand All @@ -637,7 +649,7 @@ def prepare_duplicates_for_delete(obj):

if not scope_ids_subquery.exists():
logger.debug("no findings in scope, nothing to prepare")
return
return None

# Bulk-reset inside-scope duplicates: single UPDATE instead of per-original mass_model_updater.
# Clears the duplicate_finding FK so cascade_delete won't trip over dangling self-references.
Expand Down Expand Up @@ -694,6 +706,8 @@ def prepare_duplicates_for_delete(obj):
outside_orphan_count,
)

return None


@receiver(pre_delete, sender=Test)
def test_pre_delete(sender, instance, **kwargs):
Expand Down Expand Up @@ -830,13 +844,15 @@ def _bulk_delete_findings_internal(finding_qs, chunk_size=1000, *, order_desc=Fa
)


def bulk_delete_findings(finding_qs, chunk_size=1000, cascade_root=None, *, order_desc=False):
def bulk_delete_findings(finding_qs, chunk_size=1000, cascade_root=None, *, order_desc=False, preview_only=False):
"""
Entry point; may delegate to Pro via settings.BULK_DELETE_FINDINGS_METHOD.

cascade_root: optional dict describing the top-level object whose cascade triggered
this bulk delete (e.g. {"model": "dojo.engagement", "pk": 9}). Ignored by OSS
when no custom method is configured.

preview_only: when True, return a ``{product_id: finding_count}`` dict without deleting anything.
"""
from dojo.utils import get_custom_method # noqa: PLC0415 circular import

Expand All @@ -846,7 +862,10 @@ def bulk_delete_findings(finding_qs, chunk_size=1000, cascade_root=None, *, orde
chunk_size=chunk_size,
cascade_root=cascade_root,
order_desc=order_desc,
preview_only=preview_only,
)
if preview_only:
return None
return _bulk_delete_findings_internal(finding_qs, chunk_size=chunk_size, order_desc=order_desc)


Expand Down
10 changes: 8 additions & 2 deletions dojo/management/commands/import_all_unittest_scans.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from inspect import isclass
from pathlib import Path

from django.conf import settings
from django.core.management.base import BaseCommand
from django.urls import reverse
from django.utils import timezone
Expand Down Expand Up @@ -43,6 +44,7 @@ def add_arguments(self, parser):
parser.add_argument("--engagements-per-product", type=int, default=50, help="Number of engagements per product before a new product is created, defaults to 50")
parser.add_argument("--products-per-product-type", type=int, default=15, help="Number of products per product type before a new product type is created, defaults to 15")
parser.add_argument("--number-of-runs", type=int, default=1, help="Number of times to run the import of all sample scans, defaults to 1")
parser.add_argument("--background-import", action="store_true", default=False, help="Use async/background imports (Pro feature, default: False)")

def get_test_admin(self, *args, **kwargs):
return User.objects.get(username="admin")
Expand All @@ -64,7 +66,7 @@ def import_scan(self, payload, expected_http_status_code):
def import_scan_with_params(self, filename, scan_type="ZAP Scan", engagement=1, minimum_severity="Low", *, active=True, verified=False,
push_to_jira=None, endpoint_to_add=None, tags=None, close_old_findings=False, group_by=None, engagement_name=None,
product_name=None, product_type_name=None, auto_create_context=None, expected_http_status_code=201, test_title=None,
scan_date=None, service=None, force_active=True, force_verified=True):
scan_date=None, service=None, force_active=True, force_verified=True, background_import=False):

with (Path("unittests/scans") / filename).open(encoding="utf-8") as testfile:
payload = {
Expand All @@ -73,6 +75,7 @@ def import_scan_with_params(self, filename, scan_type="ZAP Scan", engagement=1,
"file": testfile,
"version": "1.0.1",
"close_old_findings": close_old_findings,
"background_import": background_import,
}

if active is not None:
Expand Down Expand Up @@ -119,7 +122,7 @@ def import_scan_with_params(self, filename, scan_type="ZAP Scan", engagement=1,

return self.import_scan(payload, expected_http_status_code)

def import_all_unittest_scans(self, product_name_prefix=None, tests_per_engagement=10, engagements_per_product=50, products_per_product_type=15, *, include_very_big_scans=False, **kwargs):
def import_all_unittest_scans(self, product_name_prefix=None, tests_per_engagement=10, engagements_per_product=50, products_per_product_type=15, *, include_very_big_scans=False, background_import=False, **kwargs):
logger.info("product_name_prefix: %s, tests_per_engagement: %s, engagements_per_product: %s, products_per_product_type: %s", product_name_prefix, tests_per_engagement, engagements_per_product, products_per_product_type)
product_type_prefix = "Sample scans " + datetime.now().strftime("%Y-%m-%d %H:%M:%S")
product_type_index = 1
Expand Down Expand Up @@ -172,6 +175,7 @@ def import_all_unittest_scans(self, product_name_prefix=None, tests_per_engageme
filename=module_name + "/" + scan_file.name,
scan_type=parser.get_scan_types()[0],
engagement=eng.id,
background_import=background_import,
)
# logger.debug(f"Result of import: {result}")
# raise Exception(f"Scan {scan_file.name} is not expected to be imported, but it was.")
Expand All @@ -191,6 +195,7 @@ def import_all_unittest_scans(self, product_name_prefix=None, tests_per_engageme
logger.error("Error importing scan %s: %s", scan, message)

def handle(self, *args, **options):
settings.SECURE_SSL_REDIRECT = False
logger.info("EXPERIMENTAL: This command may be changed/deprecated/removed without prior notice.")
for i in range(options.get("number_of_runs", 1)):
product_name_prefix = options.get("product_name_prefix")
Expand All @@ -203,4 +208,5 @@ def handle(self, *args, **options):
engagements_per_product=options.get("engagements_per_product"),
products_per_product_type=options.get("products_per_product_type"),
include_very_big_scans=options.get("include_very_big_scans"),
background_import=options.get("background_import"),
)
11 changes: 4 additions & 7 deletions dojo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1799,14 +1799,11 @@ def __hash__(self):

def __eq__(self, other):
if isinstance(other, Endpoint):
# Check if the contents of the endpoint match
contents_match = str(self) == str(other)
# Determine if products should be used in the equation
if self.product is not None and other.product is not None:
# Check if the products are the same
products_match = (self.product) == other.product
# Check if the contents match
return products_match and contents_match
# Use product_id (cached integer) instead of self.product to avoid
# triggering a FK lookup on every comparison inside NestedObjects.add_edge.
if self.product_id is not None and other.product_id is not None:
return self.product_id == other.product_id and contents_match
return contents_match

return NotImplemented
Expand Down
138 changes: 88 additions & 50 deletions dojo/utils_cascade_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import logging
from collections import Counter

from django.db import OperationalError, models, transaction
from django.db.models.sql.compiler import SQLDeleteCompiler
Expand Down Expand Up @@ -60,7 +61,7 @@ def execute_update_sql(query, **updatespec):
return execute_compiled_sql(*get_update_sql(query, **updatespec))


def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations=None, skip_m2m_for=None, base_model=None, level=0):
def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations=None, skip_m2m_for=None, base_model=None, level=0, *, preview_only=False, counter=None, preview_models=None):
"""
Recursively walk Django model relations and execute compiled SQL
to perform cascade DELETE / SET_NULL on related objects without the Collector.
Expand All @@ -80,9 +81,15 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations
by the caller (avoids redundant tag count queries).
base_model: Root model class (set automatically on first call).
level: Recursion depth (for logging only).
preview_only: When True, count instead of delete (dry-run mode).
counter: Counter accumulator for preview_only mode. Updated in place.
preview_models: Optional set of model __name__ strings. When set, only
COUNT models in this set during preview_only; still recurse
through all models to reach tracked descendants.

Returns:
Number of records deleted at this level (0 at level 0 since root is not deleted).
In preview_only mode always returns 0; counts are accumulated in ``counter``.

"""
if skip_relations is None:
Expand All @@ -91,6 +98,8 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations
skip_m2m_for = set()
if base_model is None:
base_model = from_model
if preview_only and counter is None:
counter = Counter()

instance_pk_query = instance_pk_query.values_list("pk").order_by()

Expand Down Expand Up @@ -118,27 +127,52 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations
filterspec = {f"{fk_column}__in": models.Subquery(instance_pk_query)}

if on_delete_name == "SET_NULL":
count = execute_update_sql(
related_model.objects.filter(**filterspec),
**{fk_column: None},
)
logger.debug(
"cascade_delete: SET NULL on %d %s records",
count, related_model.__name__,
)
if not preview_only:
count = execute_update_sql(
related_model.objects.filter(**filterspec),
**{fk_column: None},
)
logger.debug(
"cascade_delete: SET NULL on %d %s records",
count, related_model.__name__,
)
# In preview_only mode SET_NULL means objects survive — nothing to count.

elif on_delete_name == "CASCADE":
related_pk_query = related_model.objects.filter(**filterspec).values_list(
related_model._meta.pk.name,
)
# Recurse into children first (bottom-up deletion)
cascade_delete_related_objects(
related_model, related_pk_query,
skip_relations=skip_relations,
skip_m2m_for=skip_m2m_for,
base_model=base_model,
level=level + 1,
)
if preview_only:
# Count related objects at this level before recursing into their children.
# Skip COUNT when preview_models is set and this model is not in it.
if preview_models is None or related_model.__name__ in preview_models:
n = related_model.objects.filter(**filterspec).count()
if n:
counter[related_model.__name__] += n
logger.debug(
"cascade_delete preview: counted %d %s records",
n, related_model.__name__,
)
# Recurse to count grandchildren even when n==0 (subquery may still match).
cascade_delete_related_objects(
related_model, related_pk_query,
skip_relations=skip_relations,
skip_m2m_for=skip_m2m_for,
base_model=base_model,
level=level + 1,
preview_only=True,
counter=counter,
preview_models=preview_models,
)
else:
# Recurse into children first (bottom-up deletion)
cascade_delete_related_objects(
related_model, related_pk_query,
skip_relations=skip_relations,
skip_m2m_for=skip_m2m_for,
base_model=base_model,
level=level + 1,
)

elif on_delete_name == "DO_NOTHING":
logger.debug(
Expand All @@ -152,39 +186,40 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations
on_delete_name, from_model.__name__, related_model.__name__,
)

# Clear M2M through tables before deleting (not discovered by _meta.related_objects).
# Skip if the caller already handled M2M cleanup for this model (e.g. bulk_clear_finding_m2m).
if from_model not in skip_m2m_for:
from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import

bulk_remove_all_tags(from_model, instance_pk_query)

# Clear all M2M through tables — both forward (from_model._meta.many_to_many)
# and reverse (other models with ManyToManyField pointing to from_model).
# Forward M2M fields use field.remote_field.through, reverse use field.through.
if from_model not in skip_m2m_for:
m2m_through_models = set()
for field_info in from_model._meta.get_fields():
if hasattr(field_info, "tag_options"):
continue
through = getattr(field_info, "through", None) or getattr(getattr(field_info, "remote_field", None), "through", None)
if through is not None:
m2m_through_models.add(through)

for through_model in m2m_through_models:
fk_column = None
for field in through_model._meta.get_fields():
if hasattr(field, "related_model") and field.related_model is from_model:
fk_column = field.column
break
if fk_column:
filterspec_m2m = {f"{fk_column}__in": models.Subquery(instance_pk_query)}
m2m_count = execute_delete_sql(through_model.objects.filter(**filterspec_m2m))
if m2m_count:
logger.debug(
"cascade_delete: cleared %d rows from M2M %s",
m2m_count, through_model._meta.db_table,
)
if not preview_only:
# Clear M2M through tables before deleting (not discovered by _meta.related_objects).
# Skip if the caller already handled M2M cleanup for this model (e.g. bulk_clear_finding_m2m).
if from_model not in skip_m2m_for:
from dojo.tag_utils import bulk_remove_all_tags # noqa: PLC0415 circular import

bulk_remove_all_tags(from_model, instance_pk_query)

# Clear all M2M through tables — both forward (from_model._meta.many_to_many)
# and reverse (other models with ManyToManyField pointing to from_model).
# Forward M2M fields use field.remote_field.through, reverse use field.through.
if from_model not in skip_m2m_for:
m2m_through_models = set()
for field_info in from_model._meta.get_fields():
if hasattr(field_info, "tag_options"):
continue
through = getattr(field_info, "through", None) or getattr(getattr(field_info, "remote_field", None), "through", None)
if through is not None:
m2m_through_models.add(through)

for through_model in m2m_through_models:
fk_column = None
for field in through_model._meta.get_fields():
if hasattr(field, "related_model") and field.related_model is from_model:
fk_column = field.column
break
if fk_column:
filterspec_m2m = {f"{fk_column}__in": models.Subquery(instance_pk_query)}
m2m_count = execute_delete_sql(through_model.objects.filter(**filterspec_m2m))
if m2m_count:
logger.debug(
"cascade_delete: cleared %d rows from M2M %s",
m2m_count, through_model._meta.db_table,
)

# At level 0, do NOT delete root records — the caller handles that
# (e.g. via ORM obj.delete() to fire Django signals).
Expand All @@ -195,6 +230,9 @@ def cascade_delete_related_objects(from_model, instance_pk_query, skip_relations
)
return 0

if preview_only:
return 0

# At deeper levels, delete records after their children are gone
filterspec = {f"{from_model._meta.pk.name}__in": models.Subquery(instance_pk_query)}
del_query = from_model.objects.filter(**filterspec)
Expand Down
Loading
Loading