1from collections import Counter, defaultdict
2from functools import partial, reduce
3from itertools import chain
4from operator import attrgetter, or_
5
6from django.db import IntegrityError, connections, models, transaction
7from django.db.models import query_utils, signals, sql
8
9
10class ProtectedError(IntegrityError):
11 def __init__(self, msg, protected_objects):
12 self.protected_objects = protected_objects
13 super().__init__(msg, protected_objects)
14
15
16class RestrictedError(IntegrityError):
17 def __init__(self, msg, restricted_objects):
18 self.restricted_objects = restricted_objects
19 super().__init__(msg, restricted_objects)
20
21
22def CASCADE(collector, field, sub_objs, using):
23 collector.collect(
24 sub_objs,
25 source=field.remote_field.model,
26 source_attr=field.name,
27 nullable=field.null,
28 fail_on_restricted=False,
29 )
30 if field.null and not connections[using].features.can_defer_constraint_checks:
31 collector.add_field_update(field, None, sub_objs)
32
33
34def PROTECT(collector, field, sub_objs, using):
35 raise ProtectedError(
36 "Cannot delete some instances of model '%s' because they are "
37 "referenced through a protected foreign key: '%s.%s'"
38 % (
39 field.remote_field.model.__name__,
40 sub_objs[0].__class__.__name__,
41 field.name,
42 ),
43 sub_objs,
44 )
45
46
47def RESTRICT(collector, field, sub_objs, using):
48 collector.add_restricted_objects(field, sub_objs)
49 collector.add_dependency(field.remote_field.model, field.model)
50
51
52def SET(value):
53 if callable(value):
54
55 def set_on_delete(collector, field, sub_objs, using):
56 collector.add_field_update(field, value(), sub_objs)
57
58 else:
59
60 def set_on_delete(collector, field, sub_objs, using):
61 collector.add_field_update(field, value, sub_objs)
62
63 set_on_delete.lazy_sub_objs = True
64
65 set_on_delete.deconstruct = lambda: ("django.db.models.SET", (value,), {})
66 return set_on_delete
67
68
69def SET_NULL(collector, field, sub_objs, using):
70 collector.add_field_update(field, None, sub_objs)
71
72
73SET_NULL.lazy_sub_objs = True
74
75
76def SET_DEFAULT(collector, field, sub_objs, using):
77 collector.add_field_update(field, field.get_default(), sub_objs)
78
79
80def DO_NOTHING(collector, field, sub_objs, using):
81 pass
82
83
84def get_candidate_relations_to_delete(opts):
85 # The candidate relations are the ones that come from N-1 and 1-1 relations.
86 # N-N (i.e., many-to-many) relations aren't candidates for deletion.
87 return (
88 f
89 for f in opts.get_fields(include_hidden=True)
90 if f.auto_created and not f.concrete and (f.one_to_one or f.one_to_many)
91 )
92
93
94class Collector:
95 def __init__(self, using, origin=None):
96 self.using = using
97 # A Model or QuerySet object.
98 self.origin = origin
99 # Initially, {model: {instances}}, later values become lists.
100 self.data = defaultdict(set)
101 # {(field, value): [instances, …]}
102 self.field_updates = defaultdict(list)
103 # {model: {field: {instances}}}
104 self.restricted_objects = defaultdict(partial(defaultdict, set))
105 # fast_deletes is a list of queryset-likes that can be deleted without
106 # fetching the objects into memory.
107 self.fast_deletes = []
108
109 # Tracks deletion-order dependency for databases without transactions
110 # or ability to defer constraint checks. Only concrete model classes
111 # should be included, as the dependencies exist only between actual
112 # database tables; proxy models are represented here by their concrete
113 # parent.
114 self.dependencies = defaultdict(set) # {model: {models}}
115
116 def add(self, objs, source=None, nullable=False, reverse_dependency=False):
117 """
118 Add 'objs' to the collection of objects to be deleted. If the call is
119 the result of a cascade, 'source' should be the model that caused it,
120 and 'nullable' should be set to True if the relation can be null.
121
122 Return a list of all objects that were not already collected.
123 """
124 if not objs:
125 return []
126 new_objs = []
127 model = objs[0].__class__
128 instances = self.data[model]
129 for obj in objs:
130 if obj not in instances:
131 new_objs.append(obj)
132 instances.update(new_objs)
133 # Nullable relationships can be ignored -- they are nulled out before
134 # deleting, and therefore do not affect the order in which objects have
135 # to be deleted.
136 if source is not None and not nullable:
137 self.add_dependency(source, model, reverse_dependency=reverse_dependency)
138 return new_objs
139
140 def add_dependency(self, model, dependency, reverse_dependency=False):
141 if reverse_dependency:
142 model, dependency = dependency, model
143 self.dependencies[model._meta.concrete_model].add(
144 dependency._meta.concrete_model
145 )
146 self.data.setdefault(dependency, self.data.default_factory())
147
148 def add_field_update(self, field, value, objs):
149 """
150 Schedule a field update. 'objs' must be a homogeneous iterable
151 collection of model instances (e.g. a QuerySet).
152 """
153 self.field_updates[field, value].append(objs)
154
155 def add_restricted_objects(self, field, objs):
156 if objs:
157 model = objs[0].__class__
158 self.restricted_objects[model][field].update(objs)
159
160 def clear_restricted_objects_from_set(self, model, objs):
161 if model in self.restricted_objects:
162 self.restricted_objects[model] = {
163 field: items - objs
164 for field, items in self.restricted_objects[model].items()
165 }
166
167 def clear_restricted_objects_from_queryset(self, model, qs):
168 if model in self.restricted_objects:
169 objs = set(
170 qs.filter(
171 pk__in=[
172 obj.pk
173 for objs in self.restricted_objects[model].values()
174 for obj in objs
175 ]
176 )
177 )
178 self.clear_restricted_objects_from_set(model, objs)
179
180 def _has_signal_listeners(self, model):
181 return signals.pre_delete.has_listeners(
182 model
183 ) or signals.post_delete.has_listeners(model)
184
185 def can_fast_delete(self, objs, from_field=None):
186 """
187 Determine if the objects in the given queryset-like or single object
188 can be fast-deleted. This can be done if there are no cascades, no
189 parents and no signal listeners for the object class.
190
191 The 'from_field' tells where we are coming from - we need this to
192 determine if the objects are in fact to be deleted. Allow also
193 skipping parent -> child -> parent chain preventing fast delete of
194 the child.
195 """
196 if from_field and from_field.remote_field.on_delete is not CASCADE:
197 return False
198 if hasattr(objs, "_meta"):
199 model = objs._meta.model
200 elif hasattr(objs, "model") and hasattr(objs, "_raw_delete"):
201 model = objs.model
202 else:
203 return False
204 if self._has_signal_listeners(model):
205 return False
206 # The use of from_field comes from the need to avoid cascade back to
207 # parent when parent delete is cascading to child.
208 opts = model._meta
209 return (
210 all(
211 link == from_field
212 for link in opts.concrete_model._meta.parents.values()
213 )
214 and
215 # Foreign keys pointing to this model.
216 all(
217 related.field.remote_field.on_delete is DO_NOTHING
218 for related in get_candidate_relations_to_delete(opts)
219 )
220 and (
221 # Something like generic foreign key.
222 not any(
223 hasattr(field, "bulk_related_objects")
224 for field in opts.private_fields
225 )
226 )
227 )
228
229 def get_del_batches(self, objs, fields):
230 """
231 Return the objs in suitably sized batches for the used connection.
232 """
233 field_names = [field.name for field in fields]
234 conn_batch_size = max(
235 connections[self.using].ops.bulk_batch_size(field_names, objs), 1
236 )
237 if len(objs) > conn_batch_size:
238 return [
239 objs[i : i + conn_batch_size]
240 for i in range(0, len(objs), conn_batch_size)
241 ]
242 else:
243 return [objs]
244
245 def collect(
246 self,
247 objs,
248 source=None,
249 nullable=False,
250 collect_related=True,
251 source_attr=None,
252 reverse_dependency=False,
253 keep_parents=False,
254 fail_on_restricted=True,
255 ):
256 """
257 Add 'objs' to the collection of objects to be deleted as well as all
258 parent instances. 'objs' must be a homogeneous iterable collection of
259 model instances (e.g. a QuerySet). If 'collect_related' is True,
260 related objects will be handled by their respective on_delete handler.
261
262 If the call is the result of a cascade, 'source' should be the model
263 that caused it and 'nullable' should be set to True, if the relation
264 can be null.
265
266 If 'reverse_dependency' is True, 'source' will be deleted before the
267 current model, rather than after. (Needed for cascading to parent
268 models, the one case in which the cascade follows the forwards
269 direction of an FK rather than the reverse direction.)
270
271 If 'keep_parents' is True, data of parent model's will be not deleted.
272
273 If 'fail_on_restricted' is False, error won't be raised even if it's
274 prohibited to delete such objects due to RESTRICT, that defers
275 restricted object checking in recursive calls where the top-level call
276 may need to collect more objects to determine whether restricted ones
277 can be deleted.
278 """
279 if self.can_fast_delete(objs):
280 self.fast_deletes.append(objs)
281 return
282 new_objs = self.add(
283 objs, source, nullable, reverse_dependency=reverse_dependency
284 )
285 if not new_objs:
286 return
287
288 model = new_objs[0].__class__
289
290 if not keep_parents:
291 # Recursively collect concrete model's parent models, but not their
292 # related objects. These will be found by meta.get_fields()
293 concrete_model = model._meta.concrete_model
294 for ptr in concrete_model._meta.parents.values():
295 if ptr:
296 parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
297 self.collect(
298 parent_objs,
299 source=model,
300 source_attr=ptr.remote_field.related_name,
301 collect_related=False,
302 reverse_dependency=True,
303 fail_on_restricted=False,
304 )
305 if not collect_related:
306 return
307
308 model_fast_deletes = defaultdict(list)
309 protected_objects = defaultdict(list)
310 for related in get_candidate_relations_to_delete(model._meta):
311 # Preserve parent reverse relationships if keep_parents=True.
312 if keep_parents and related.model in model._meta.all_parents:
313 continue
314 field = related.field
315 on_delete = field.remote_field.on_delete
316 if on_delete == DO_NOTHING:
317 continue
318 related_model = related.related_model
319 if self.can_fast_delete(related_model, from_field=field):
320 model_fast_deletes[related_model].append(field)
321 continue
322 batches = self.get_del_batches(new_objs, [field])
323 for batch in batches:
324 sub_objs = self.related_objects(related_model, [field], batch)
325 # Non-referenced fields can be deferred if no signal receivers
326 # are connected for the related model as they'll never be
327 # exposed to the user. Skip field deferring when some
328 # relationships are select_related as interactions between both
329 # features are hard to get right. This should only happen in
330 # the rare cases where .related_objects is overridden anyway.
331 if not (
332 sub_objs.query.select_related
333 or self._has_signal_listeners(related_model)
334 ):
335 referenced_fields = set(
336 chain.from_iterable(
337 (rf.attname for rf in rel.field.foreign_related_fields)
338 for rel in get_candidate_relations_to_delete(
339 related_model._meta
340 )
341 )
342 )
343 sub_objs = sub_objs.only(*tuple(referenced_fields))
344 if getattr(on_delete, "lazy_sub_objs", False) or sub_objs:
345 try:
346 on_delete(self, field, sub_objs, self.using)
347 except ProtectedError as error:
348 key = "'%s.%s'" % (field.model.__name__, field.name)
349 protected_objects[key] += error.protected_objects
350 if protected_objects:
351 raise ProtectedError(
352 "Cannot delete some instances of model %r because they are "
353 "referenced through protected foreign keys: %s."
354 % (
355 model.__name__,
356 ", ".join(protected_objects),
357 ),
358 set(chain.from_iterable(protected_objects.values())),
359 )
360 for related_model, related_fields in model_fast_deletes.items():
361 batches = self.get_del_batches(new_objs, related_fields)
362 for batch in batches:
363 sub_objs = self.related_objects(related_model, related_fields, batch)
364 self.fast_deletes.append(sub_objs)
365 for field in model._meta.private_fields:
366 if hasattr(field, "bulk_related_objects"):
367 # It's something like generic foreign key.
368 sub_objs = field.bulk_related_objects(new_objs, self.using)
369 self.collect(
370 sub_objs, source=model, nullable=True, fail_on_restricted=False
371 )
372
373 if fail_on_restricted:
374 # Raise an error if collected restricted objects (RESTRICT) aren't
375 # candidates for deletion also collected via CASCADE.
376 for related_model, instances in self.data.items():
377 self.clear_restricted_objects_from_set(related_model, instances)
378 for qs in self.fast_deletes:
379 self.clear_restricted_objects_from_queryset(qs.model, qs)
380 if self.restricted_objects.values():
381 restricted_objects = defaultdict(list)
382 for related_model, fields in self.restricted_objects.items():
383 for field, objs in fields.items():
384 if objs:
385 key = "'%s.%s'" % (related_model.__name__, field.name)
386 restricted_objects[key] += objs
387 if restricted_objects:
388 raise RestrictedError(
389 "Cannot delete some instances of model %r because "
390 "they are referenced through restricted foreign keys: "
391 "%s."
392 % (
393 model.__name__,
394 ", ".join(restricted_objects),
395 ),
396 set(chain.from_iterable(restricted_objects.values())),
397 )
398
399 def related_objects(self, related_model, related_fields, objs):
400 """
401 Get a QuerySet of the related model to objs via related fields.
402 """
403 predicate = query_utils.Q.create(
404 [(f"{related_field.name}__in", objs) for related_field in related_fields],
405 connector=query_utils.Q.OR,
406 )
407 return related_model._base_manager.using(self.using).filter(predicate)
408
409 def instances_with_model(self):
410 for model, instances in self.data.items():
411 for obj in instances:
412 yield model, obj
413
414 def sort(self):
415 sorted_models = []
416 concrete_models = set()
417 models = list(self.data)
418 while len(sorted_models) < len(models):
419 found = False
420 for model in models:
421 if model in sorted_models:
422 continue
423 dependencies = self.dependencies.get(model._meta.concrete_model)
424 if not (dependencies and dependencies.difference(concrete_models)):
425 sorted_models.append(model)
426 concrete_models.add(model._meta.concrete_model)
427 found = True
428 if not found:
429 return
430 self.data = {model: self.data[model] for model in sorted_models}
431
432 def delete(self):
433 # sort instance collections
434 for model, instances in self.data.items():
435 self.data[model] = sorted(instances, key=attrgetter("pk"))
436
437 # if possible, bring the models in an order suitable for databases that
438 # don't support transactions or cannot defer constraint checks until the
439 # end of a transaction.
440 self.sort()
441 # number of objects deleted for each model label
442 deleted_counter = Counter()
443
444 # Optimize for the case with a single obj and no dependencies
445 if len(self.data) == 1 and len(instances) == 1:
446 instance = list(instances)[0]
447 if self.can_fast_delete(instance):
448 with transaction.mark_for_rollback_on_error(self.using):
449 count = sql.DeleteQuery(model).delete_batch(
450 [instance.pk], self.using
451 )
452 setattr(instance, model._meta.pk.attname, None)
453 return count, {model._meta.label: count}
454
455 with transaction.atomic(using=self.using, savepoint=False):
456 # send pre_delete signals
457 for model, obj in self.instances_with_model():
458 if not model._meta.auto_created:
459 signals.pre_delete.send(
460 sender=model,
461 instance=obj,
462 using=self.using,
463 origin=self.origin,
464 )
465
466 # fast deletes
467 for qs in self.fast_deletes:
468 count = qs._raw_delete(using=self.using)
469 if count:
470 deleted_counter[qs.model._meta.label] += count
471
472 # update fields
473 for (field, value), instances_list in self.field_updates.items():
474 updates = []
475 objs = []
476 for instances in instances_list:
477 if (
478 isinstance(instances, models.QuerySet)
479 and instances._result_cache is None
480 ):
481 updates.append(instances)
482 else:
483 objs.extend(instances)
484 if updates:
485 combined_updates = reduce(or_, updates)
486 combined_updates.update(**{field.name: value})
487 if objs:
488 model = objs[0].__class__
489 query = sql.UpdateQuery(model)
490 query.update_batch(
491 list({obj.pk for obj in objs}), {field.name: value}, self.using
492 )
493
494 # reverse instance collections
495 for instances in self.data.values():
496 instances.reverse()
497
498 # delete instances
499 for model, instances in self.data.items():
500 query = sql.DeleteQuery(model)
501 pk_list = [obj.pk for obj in instances]
502 count = query.delete_batch(pk_list, self.using)
503 if count:
504 deleted_counter[model._meta.label] += count
505
506 if not model._meta.auto_created:
507 for obj in instances:
508 signals.post_delete.send(
509 sender=model,
510 instance=obj,
511 using=self.using,
512 origin=self.origin,
513 )
514
515 for model, instances in self.data.items():
516 for instance in instances:
517 setattr(instance, model._meta.pk.attname, None)
518 return sum(deleted_counter.values()), dict(deleted_counter)