1"""
2Create SQL statements for QuerySets.
3
4The code in here encapsulates all of the SQL construction so that QuerySets
5themselves do not have to (and could be backed by things other than SQL
6databases). The abstraction barrier only works one way: this module has to know
7all about the internals of models in order to get the information it needs.
8"""
9
10import copy
11import difflib
12import functools
13import sys
14from collections import Counter, namedtuple
15from collections.abc import Iterator, Mapping
16from itertools import chain, count, product
17from string import ascii_uppercase
18
19from django.core.exceptions import FieldDoesNotExist, FieldError
20from django.db import DEFAULT_DB_ALIAS, NotSupportedError, connections
21from django.db.models.aggregates import Count
22from django.db.models.constants import LOOKUP_SEP
23from django.db.models.expressions import (
24 BaseExpression,
25 Col,
26 ColPairs,
27 Exists,
28 F,
29 OuterRef,
30 RawSQL,
31 Ref,
32 ResolvedOuterRef,
33 Value,
34)
35from django.db.models.fields import Field
36from django.db.models.lookups import Lookup
37from django.db.models.query_utils import (
38 Q,
39 check_rel_lookup_compatibility,
40 refs_expression,
41)
42from django.db.models.sql.constants import INNER, LOUTER, ORDER_DIR, SINGLE
43from django.db.models.sql.datastructures import BaseTable, Empty, Join, MultiJoin
44from django.db.models.sql.where import AND, OR, ExtraWhere, NothingNode, WhereNode
45from django.utils.functional import cached_property
46from django.utils.regex_helper import _lazy_re_compile
47from django.utils.tree import Node
48
49__all__ = ["Query", "RawQuery"]
50
51# Quotation marks ('"`[]), whitespace characters, semicolons, or inline
52# SQL comments are forbidden in column aliases.
53FORBIDDEN_ALIAS_PATTERN = _lazy_re_compile(r"['`\"\]\[;\s]|--|/\*|\*/")
54
55# Inspired from
56# https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
57EXPLAIN_OPTIONS_PATTERN = _lazy_re_compile(r"[\w-]+")
58
59
60def get_field_names_from_opts(opts):
61 if opts is None:
62 return set()
63 return set(
64 chain.from_iterable(
65 (f.name, f.attname) if f.concrete else (f.name,) for f in opts.get_fields()
66 )
67 )
68
69
70def get_paths_from_expression(expr):
71 if isinstance(expr, F):
72 yield expr.name
73 elif hasattr(expr, "flatten"):
74 for child in expr.flatten():
75 if isinstance(child, F):
76 yield child.name
77 elif isinstance(child, Q):
78 yield from get_children_from_q(child)
79
80
81def get_children_from_q(q):
82 for child in q.children:
83 if isinstance(child, Node):
84 yield from get_children_from_q(child)
85 elif isinstance(child, tuple):
86 lhs, rhs = child
87 yield lhs
88 if hasattr(rhs, "resolve_expression"):
89 yield from get_paths_from_expression(rhs)
90 elif hasattr(child, "resolve_expression"):
91 yield from get_paths_from_expression(child)
92
93
94def get_child_with_renamed_prefix(prefix, replacement, child):
95 from django.db.models.query import QuerySet
96
97 if isinstance(child, Node):
98 return rename_prefix_from_q(prefix, replacement, child)
99 if isinstance(child, tuple):
100 lhs, rhs = child
101 if lhs.startswith(prefix + LOOKUP_SEP):
102 lhs = lhs.replace(prefix, replacement, 1)
103 if not isinstance(rhs, F) and hasattr(rhs, "resolve_expression"):
104 rhs = get_child_with_renamed_prefix(prefix, replacement, rhs)
105 return lhs, rhs
106
107 if isinstance(child, F):
108 child = child.copy()
109 if child.name.startswith(prefix + LOOKUP_SEP):
110 child.name = child.name.replace(prefix, replacement, 1)
111 elif isinstance(child, QuerySet):
112 # QuerySet may contain OuterRef() references which cannot work properly
113 # without repointing to the filtered annotation and will spawn a
114 # different JOIN. Always raise ValueError instead of providing partial
115 # support in other cases.
116 raise ValueError(
117 "Passing a QuerySet within a FilteredRelation is not supported."
118 )
119 elif hasattr(child, "resolve_expression"):
120 child = child.copy()
121 child.set_source_expressions(
122 [
123 get_child_with_renamed_prefix(prefix, replacement, grand_child)
124 for grand_child in child.get_source_expressions()
125 ]
126 )
127 return child
128
129
130def rename_prefix_from_q(prefix, replacement, q):
131 return Q.create(
132 [get_child_with_renamed_prefix(prefix, replacement, c) for c in q.children],
133 q.connector,
134 q.negated,
135 )
136
137
138JoinInfo = namedtuple(
139 "JoinInfo",
140 ("final_field", "targets", "opts", "joins", "path", "transform_function"),
141)
142
143
144class RawQuery:
145 """A single raw SQL query."""
146
147 def __init__(self, sql, using, params=()):
148 self.params = params
149 self.sql = sql
150 self.using = using
151 self.cursor = None
152
153 # Mirror some properties of a normal query so that
154 # the compiler can be used to process results.
155 self.low_mark, self.high_mark = 0, None # Used for offset/limit
156 self.extra_select = {}
157 self.annotation_select = {}
158
159 def chain(self, using):
160 return self.clone(using)
161
162 def clone(self, using):
163 return RawQuery(self.sql, using, params=self.params)
164
165 def get_columns(self):
166 if self.cursor is None:
167 self._execute_query()
168 converter = connections[self.using].introspection.identifier_converter
169 return [converter(column_meta[0]) for column_meta in self.cursor.description]
170
171 def __iter__(self):
172 # Always execute a new query for a new iterator.
173 # This could be optimized with a cache at the expense of RAM.
174 self._execute_query()
175 if not connections[self.using].features.can_use_chunked_reads:
176 # If the database can't use chunked reads we need to make sure we
177 # evaluate the entire query up front.
178 result = list(self.cursor)
179 else:
180 result = self.cursor
181 return iter(result)
182
183 def __repr__(self):
184 return "<%s: %s>" % (self.__class__.__name__, self)
185
186 @property
187 def params_type(self):
188 if self.params is None:
189 return None
190 return dict if isinstance(self.params, Mapping) else tuple
191
192 def __str__(self):
193 if self.params_type is None:
194 return self.sql
195 return self.sql % self.params_type(self.params)
196
197 def _execute_query(self):
198 connection = connections[self.using]
199
200 # Adapt parameters to the database, as much as possible considering
201 # that the target type isn't known. See #17755.
202 params_type = self.params_type
203 adapter = connection.ops.adapt_unknown_value
204 if params_type is tuple:
205 params = tuple(adapter(val) for val in self.params)
206 elif params_type is dict:
207 params = {key: adapter(val) for key, val in self.params.items()}
208 elif params_type is None:
209 params = None
210 else:
211 raise RuntimeError("Unexpected params type: %s" % params_type)
212
213 self.cursor = connection.cursor()
214 self.cursor.execute(self.sql, params)
215
216
217ExplainInfo = namedtuple("ExplainInfo", ("format", "options"))
218
219
220class Query(BaseExpression):
221 """A single SQL query."""
222
223 alias_prefix = "T"
224 empty_result_set_value = None
225 subq_aliases = frozenset([alias_prefix])
226
227 compiler = "SQLCompiler"
228
229 base_table_class = BaseTable
230 join_class = Join
231
232 default_cols = True
233 default_ordering = True
234 standard_ordering = True
235
236 filter_is_sticky = False
237 subquery = False
238
239 # SQL-related attributes.
240 # Select and related select clauses are expressions to use in the SELECT
241 # clause of the query. The select is used for cases where we want to set up
242 # the select clause to contain other than default fields (values(),
243 # subqueries...). Note that annotations go to annotations dictionary.
244 select = ()
245 # The group_by attribute can have one of the following forms:
246 # - None: no group by at all in the query
247 # - A tuple of expressions: group by (at least) those expressions.
248 # String refs are also allowed for now.
249 # - True: group by all select fields of the model
250 # See compiler.get_group_by() for details.
251 group_by = None
252 order_by = ()
253 low_mark = 0 # Used for offset/limit.
254 high_mark = None # Used for offset/limit.
255 distinct = False
256 distinct_fields = ()
257 select_for_update = False
258 select_for_update_nowait = False
259 select_for_update_skip_locked = False
260 select_for_update_of = ()
261 select_for_no_key_update = False
262 select_related = False
263 # Arbitrary limit for select_related to prevents infinite recursion.
264 max_depth = 5
265 # Holds the selects defined by a call to values() or values_list()
266 # excluding annotation_select and extra_select.
267 values_select = ()
268 selected = None
269
270 # SQL annotation-related attributes.
271 annotation_select_mask = None
272 _annotation_select_cache = None
273
274 # Set combination attributes.
275 combinator = None
276 combinator_all = False
277 combined_queries = ()
278
279 # These are for extensions. The contents are more or less appended verbatim
280 # to the appropriate clause.
281 extra_select_mask = None
282 _extra_select_cache = None
283
284 extra_tables = ()
285 extra_order_by = ()
286
287 # A tuple that is a set of model field names and either True, if these are
288 # the fields to defer, or False if these are the only fields to load.
289 deferred_loading = (frozenset(), True)
290
291 explain_info = None
292
293 def __init__(self, model, alias_cols=True):
294 self.model = model
295 self.alias_refcount = {}
296 # alias_map is the most important data structure regarding joins.
297 # It's used for recording which joins exist in the query and what
298 # types they are. The key is the alias of the joined table (possibly
299 # the table name) and the value is a Join-like object (see
300 # sql.datastructures.Join for more information).
301 self.alias_map = {}
302 # Whether to provide alias to columns during reference resolving.
303 self.alias_cols = alias_cols
304 # Sometimes the query contains references to aliases in outer queries (as
305 # a result of split_exclude). Correct alias quoting needs to know these
306 # aliases too.
307 # Map external tables to whether they are aliased.
308 self.external_aliases = {}
309 self.table_map = {} # Maps table names to list of aliases.
310 self.used_aliases = set()
311
312 self.where = WhereNode()
313 # Maps alias -> Annotation Expression.
314 self.annotations = {}
315 # These are for extensions. The contents are more or less appended
316 # verbatim to the appropriate clause.
317 self.extra = {} # Maps col_alias -> (col_sql, params).
318
319 self._filtered_relations = {}
320
321 @property
322 def output_field(self):
323 if len(self.select) == 1:
324 select = self.select[0]
325 return getattr(select, "target", None) or select.field
326 elif len(self.annotation_select) == 1:
327 return next(iter(self.annotation_select.values())).output_field
328
329 @cached_property
330 def base_table(self):
331 for alias in self.alias_map:
332 return alias
333
334 def __str__(self):
335 """
336 Return the query as a string of SQL with the parameter values
337 substituted in (use sql_with_params() to see the unsubstituted string).
338
339 Parameter values won't necessarily be quoted correctly, since that is
340 done by the database interface at execution time.
341 """
342 sql, params = self.sql_with_params()
343 return sql % params
344
345 def sql_with_params(self):
346 """
347 Return the query as an SQL string and the parameters that will be
348 substituted into the query.
349 """
350 return self.get_compiler(DEFAULT_DB_ALIAS).as_sql()
351
352 def __deepcopy__(self, memo):
353 """Limit the amount of work when a Query is deepcopied."""
354 result = self.clone()
355 memo[id(self)] = result
356 return result
357
358 def get_compiler(self, using=None, connection=None, elide_empty=True):
359 if using is None and connection is None:
360 raise ValueError("Need either using or connection")
361 if using:
362 connection = connections[using]
363 return connection.ops.compiler(self.compiler)(
364 self, connection, using, elide_empty
365 )
366
367 def get_meta(self):
368 """
369 Return the Options instance (the model._meta) from which to start
370 processing. Normally, this is self.model._meta, but it can be changed
371 by subclasses.
372 """
373 if self.model:
374 return self.model._meta
375
376 def clone(self):
377 """
378 Return a copy of the current Query. A lightweight alternative to
379 deepcopy().
380 """
381 obj = Empty()
382 obj.__class__ = self.__class__
383 # Copy references to everything.
384 obj.__dict__ = self.__dict__.copy()
385 # Clone attributes that can't use shallow copy.
386 obj.alias_refcount = self.alias_refcount.copy()
387 obj.alias_map = self.alias_map.copy()
388 obj.external_aliases = self.external_aliases.copy()
389 obj.table_map = self.table_map.copy()
390 obj.where = self.where.clone()
391 obj.annotations = self.annotations.copy()
392 if self.annotation_select_mask is not None:
393 obj.annotation_select_mask = self.annotation_select_mask.copy()
394 if self.combined_queries:
395 obj.combined_queries = tuple(
396 [query.clone() for query in self.combined_queries]
397 )
398 # _annotation_select_cache cannot be copied, as doing so breaks the
399 # (necessary) state in which both annotations and
400 # _annotation_select_cache point to the same underlying objects.
401 # It will get re-populated in the cloned queryset the next time it's
402 # used.
403 obj._annotation_select_cache = None
404 obj.extra = self.extra.copy()
405 if self.extra_select_mask is not None:
406 obj.extra_select_mask = self.extra_select_mask.copy()
407 if self._extra_select_cache is not None:
408 obj._extra_select_cache = self._extra_select_cache.copy()
409 if self.select_related is not False:
410 # Use deepcopy because select_related stores fields in nested
411 # dicts.
412 obj.select_related = copy.deepcopy(obj.select_related)
413 if "subq_aliases" in self.__dict__:
414 obj.subq_aliases = self.subq_aliases.copy()
415 obj.used_aliases = self.used_aliases.copy()
416 obj._filtered_relations = self._filtered_relations.copy()
417 # Clear the cached_property, if it exists.
418 obj.__dict__.pop("base_table", None)
419 return obj
420
421 def chain(self, klass=None):
422 """
423 Return a copy of the current Query that's ready for another operation.
424 The klass argument changes the type of the Query, e.g. UpdateQuery.
425 """
426 obj = self.clone()
427 if klass and obj.__class__ != klass:
428 obj.__class__ = klass
429 if not obj.filter_is_sticky:
430 obj.used_aliases = set()
431 obj.filter_is_sticky = False
432 if hasattr(obj, "_setup_query"):
433 obj._setup_query()
434 return obj
435
436 def relabeled_clone(self, change_map):
437 clone = self.clone()
438 clone.change_aliases(change_map)
439 return clone
440
441 def _get_col(self, target, field, alias):
442 if not self.alias_cols:
443 alias = None
444 return target.get_col(alias, field)
445
446 def get_aggregation(self, using, aggregate_exprs):
447 """
448 Return the dictionary with the values of the existing aggregations.
449 """
450 if not aggregate_exprs:
451 return {}
452 # Store annotation mask prior to temporarily adding aggregations for
453 # resolving purpose to facilitate their subsequent removal.
454 refs_subquery = False
455 refs_window = False
456 replacements = {}
457 annotation_select_mask = self.annotation_select_mask
458 for alias, aggregate_expr in aggregate_exprs.items():
459 self.check_alias(alias)
460 aggregate = aggregate_expr.resolve_expression(
461 self, allow_joins=True, reuse=None, summarize=True
462 )
463 if not aggregate.contains_aggregate:
464 raise TypeError("%s is not an aggregate expression" % alias)
465 # Temporarily add aggregate to annotations to allow remaining
466 # members of `aggregates` to resolve against each others.
467 self.append_annotation_mask([alias])
468 aggregate_refs = aggregate.get_refs()
469 refs_subquery |= any(
470 getattr(self.annotations[ref], "contains_subquery", False)
471 for ref in aggregate_refs
472 )
473 refs_window |= any(
474 getattr(self.annotations[ref], "contains_over_clause", True)
475 for ref in aggregate_refs
476 )
477 aggregate = aggregate.replace_expressions(replacements)
478 self.annotations[alias] = aggregate
479 replacements[Ref(alias, aggregate)] = aggregate
480 # Stash resolved aggregates now that they have been allowed to resolve
481 # against each other.
482 aggregates = {alias: self.annotations.pop(alias) for alias in aggregate_exprs}
483 self.set_annotation_mask(annotation_select_mask)
484 # Existing usage of aggregation can be determined by the presence of
485 # selected aggregates but also by filters against aliased aggregates.
486 _, having, qualify = self.where.split_having_qualify()
487 has_existing_aggregation = (
488 any(
489 getattr(annotation, "contains_aggregate", True)
490 for annotation in self.annotations.values()
491 )
492 or having
493 )
494 set_returning_annotations = {
495 alias
496 for alias, annotation in self.annotation_select.items()
497 if getattr(annotation, "set_returning", False)
498 }
499 # Decide if we need to use a subquery.
500 #
501 # Existing aggregations would cause incorrect results as
502 # get_aggregation() must produce just one result and thus must not use
503 # GROUP BY.
504 #
505 # If the query has limit or distinct, or uses set operations, then
506 # those operations must be done in a subquery so that the query
507 # aggregates on the limit and/or distinct results instead of applying
508 # the distinct and limit after the aggregation.
509 if (
510 isinstance(self.group_by, tuple)
511 or self.is_sliced
512 or has_existing_aggregation
513 or refs_subquery
514 or refs_window
515 or qualify
516 or self.distinct
517 or self.combinator
518 or set_returning_annotations
519 ):
520 from django.db.models.sql.subqueries import AggregateQuery
521
522 inner_query = self.clone()
523 inner_query.subquery = True
524 outer_query = AggregateQuery(self.model, inner_query)
525 inner_query.select_for_update = False
526 inner_query.select_related = False
527 inner_query.set_annotation_mask(self.annotation_select)
528 # Queries with distinct_fields need ordering and when a limit is
529 # applied we must take the slice from the ordered query. Otherwise
530 # no need for ordering.
531 inner_query.clear_ordering(force=False)
532 if not inner_query.distinct:
533 # If the inner query uses default select and it has some
534 # aggregate annotations, then we must make sure the inner
535 # query is grouped by the main model's primary key. However,
536 # clearing the select clause can alter results if distinct is
537 # used.
538 if inner_query.default_cols and has_existing_aggregation:
539 inner_query.group_by = (
540 self.model._meta.pk.get_col(inner_query.get_initial_alias()),
541 )
542 inner_query.default_cols = False
543 if not qualify and not self.combinator:
544 # Mask existing annotations that are not referenced by
545 # aggregates to be pushed to the outer query unless
546 # filtering against window functions or if the query is
547 # combined as both would require complex realiasing logic.
548 annotation_mask = set()
549 if isinstance(self.group_by, tuple):
550 for expr in self.group_by:
551 annotation_mask |= expr.get_refs()
552 for aggregate in aggregates.values():
553 annotation_mask |= aggregate.get_refs()
554 # Avoid eliding expressions that might have an incidence on
555 # the implicit grouping logic.
556 for annotation_alias, annotation in self.annotation_select.items():
557 if annotation.get_group_by_cols():
558 annotation_mask.add(annotation_alias)
559 inner_query.set_annotation_mask(annotation_mask)
560 # Annotations that possibly return multiple rows cannot
561 # be masked as they might have an incidence on the query.
562 annotation_mask |= set_returning_annotations
563
564 # Add aggregates to the outer AggregateQuery. This requires making
565 # sure all columns referenced by the aggregates are selected in the
566 # inner query. It is achieved by retrieving all column references
567 # by the aggregates, explicitly selecting them in the inner query,
568 # and making sure the aggregates are repointed to them.
569 col_refs = {}
570 for alias, aggregate in aggregates.items():
571 replacements = {}
572 for col in self._gen_cols([aggregate], resolve_refs=False):
573 if not (col_ref := col_refs.get(col)):
574 index = len(col_refs) + 1
575 col_alias = f"__col{index}"
576 col_ref = Ref(col_alias, col)
577 col_refs[col] = col_ref
578 inner_query.add_annotation(col, col_alias)
579 replacements[col] = col_ref
580 outer_query.annotations[alias] = aggregate.replace_expressions(
581 replacements
582 )
583 if (
584 inner_query.select == ()
585 and not inner_query.default_cols
586 and not inner_query.annotation_select_mask
587 ):
588 # In case of Model.objects[0:3].count(), there would be no
589 # field selected in the inner query, yet we must use a subquery.
590 # So, make sure at least one field is selected.
591 inner_query.select = (
592 self.model._meta.pk.get_col(inner_query.get_initial_alias()),
593 )
594 else:
595 outer_query = self
596 self.select = ()
597 self.selected = None
598 self.default_cols = False
599 self.extra = {}
600 if self.annotations:
601 # Inline reference to existing annotations and mask them as
602 # they are unnecessary given only the summarized aggregations
603 # are requested.
604 replacements = {
605 Ref(alias, annotation): annotation
606 for alias, annotation in self.annotations.items()
607 }
608 self.annotations = {
609 alias: aggregate.replace_expressions(replacements)
610 for alias, aggregate in aggregates.items()
611 }
612 else:
613 self.annotations = aggregates
614 self.set_annotation_mask(aggregates)
615
616 empty_set_result = [
617 expression.empty_result_set_value
618 for expression in outer_query.annotation_select.values()
619 ]
620 elide_empty = not any(result is NotImplemented for result in empty_set_result)
621 outer_query.clear_ordering(force=True)
622 outer_query.clear_limits()
623 outer_query.select_for_update = False
624 outer_query.select_related = False
625 compiler = outer_query.get_compiler(using, elide_empty=elide_empty)
626 result = compiler.execute_sql(SINGLE)
627 if result is None:
628 result = empty_set_result
629 else:
630 cols = outer_query.annotation_select.values()
631 converters = compiler.get_converters(cols)
632 rows = compiler.apply_converters((result,), converters)
633 if compiler.has_composite_fields(cols):
634 rows = compiler.composite_fields_to_tuples(rows, cols)
635 result = next(rows)
636
637 return dict(zip(outer_query.annotation_select, result))
638
639 def get_count(self, using):
640 """
641 Perform a COUNT() query using the current filter constraints.
642 """
643 obj = self.clone()
644 return obj.get_aggregation(using, {"__count": Count("*")})["__count"]
645
646 def has_filters(self):
647 return self.where
648
649 def exists(self, limit=True):
650 q = self.clone()
651 if not (q.distinct and q.is_sliced):
652 if q.group_by is True:
653 q.add_fields(
654 (f.attname for f in self.model._meta.concrete_fields), False
655 )
656 # Disable GROUP BY aliases to avoid orphaning references to the
657 # SELECT clause which is about to be cleared.
658 q.set_group_by(allow_aliases=False)
659 q.clear_select_clause()
660 if q.combined_queries and q.combinator == "union":
661 q.combined_queries = tuple(
662 combined_query.exists(limit=False)
663 for combined_query in q.combined_queries
664 )
665 q.clear_ordering(force=True)
666 if limit is True:
667 q.set_limits(high=1)
668 q.add_annotation(Value(1), "a")
669 return q
670
671 def has_results(self, using):
672 q = self.exists()
673 compiler = q.get_compiler(using=using)
674 return compiler.has_results()
675
676 def explain(self, using, format=None, **options):
677 q = self.clone()
678 for option_name in options:
679 if (
680 not EXPLAIN_OPTIONS_PATTERN.fullmatch(option_name)
681 or "--" in option_name
682 ):
683 raise ValueError(f"Invalid option name: {option_name!r}.")
684 q.explain_info = ExplainInfo(format, options)
685 compiler = q.get_compiler(using=using)
686 return "\n".join(compiler.explain_query())
687
688 def combine(self, rhs, connector):
689 """
690 Merge the 'rhs' query into the current one (with any 'rhs' effects
691 being applied *after* (that is, "to the right of") anything in the
692 current query. 'rhs' is not modified during a call to this function.
693
694 The 'connector' parameter describes how to connect filters from the
695 'rhs' query.
696 """
697 if self.model != rhs.model:
698 raise TypeError("Cannot combine queries on two different base models.")
699 if self.is_sliced:
700 raise TypeError("Cannot combine queries once a slice has been taken.")
701 if self.distinct != rhs.distinct:
702 raise TypeError("Cannot combine a unique query with a non-unique query.")
703 if self.distinct_fields != rhs.distinct_fields:
704 raise TypeError("Cannot combine queries with different distinct fields.")
705
706 # If lhs and rhs shares the same alias prefix, it is possible to have
707 # conflicting alias changes like T4 -> T5, T5 -> T6, which might end up
708 # as T4 -> T6 while combining two querysets. To prevent this, change an
709 # alias prefix of the rhs and update current aliases accordingly,
710 # except if the alias is the base table since it must be present in the
711 # query on both sides.
712 initial_alias = self.get_initial_alias()
713 rhs = rhs.clone()
714 rhs.bump_prefix(self, exclude={initial_alias})
715
716 # Work out how to relabel the rhs aliases, if necessary.
717 change_map = {}
718 conjunction = connector == AND
719
720 # Determine which existing joins can be reused. When combining the
721 # query with AND we must recreate all joins for m2m filters. When
722 # combining with OR we can reuse joins. The reason is that in AND
723 # case a single row can't fulfill a condition like:
724 # revrel__col=1 & revrel__col=2
725 # But, there might be two different related rows matching this
726 # condition. In OR case a single True is enough, so single row is
727 # enough, too.
728 #
729 # Note that we will be creating duplicate joins for non-m2m joins in
730 # the AND case. The results will be correct but this creates too many
731 # joins. This is something that could be fixed later on.
732 reuse = set() if conjunction else set(self.alias_map)
733 joinpromoter = JoinPromoter(connector, 2, False)
734 joinpromoter.add_votes(
735 j for j in self.alias_map if self.alias_map[j].join_type == INNER
736 )
737 rhs_votes = set()
738 # Now, add the joins from rhs query into the new query (skipping base
739 # table).
740 rhs_tables = list(rhs.alias_map)[1:]
741 for alias in rhs_tables:
742 join = rhs.alias_map[alias]
743 # If the left side of the join was already relabeled, use the
744 # updated alias.
745 join = join.relabeled_clone(change_map)
746 new_alias = self.join(join, reuse=reuse)
747 if join.join_type == INNER:
748 rhs_votes.add(new_alias)
749 # We can't reuse the same join again in the query. If we have two
750 # distinct joins for the same connection in rhs query, then the
751 # combined query must have two joins, too.
752 reuse.discard(new_alias)
753 if alias != new_alias:
754 change_map[alias] = new_alias
755 if not rhs.alias_refcount[alias]:
756 # The alias was unused in the rhs query. Unref it so that it
757 # will be unused in the new query, too. We have to add and
758 # unref the alias so that join promotion has information of
759 # the join type for the unused alias.
760 self.unref_alias(new_alias)
761 joinpromoter.add_votes(rhs_votes)
762 joinpromoter.update_join_types(self)
763
764 # Combine subqueries aliases to ensure aliases relabelling properly
765 # handle subqueries when combining where and select clauses.
766 self.subq_aliases |= rhs.subq_aliases
767
768 # Now relabel a copy of the rhs where-clause and add it to the current
769 # one.
770 w = rhs.where.clone()
771 w.relabel_aliases(change_map)
772 self.where.add(w, connector)
773
774 # Selection columns and extra extensions are those provided by 'rhs'.
775 if rhs.select:
776 self.set_select([col.relabeled_clone(change_map) for col in rhs.select])
777 else:
778 self.select = ()
779
780 if connector == OR:
781 # It would be nice to be able to handle this, but the queries don't
782 # really make sense (or return consistent value sets). Not worth
783 # the extra complexity when you can write a real query instead.
784 if self.extra and rhs.extra:
785 raise ValueError(
786 "When merging querysets using 'or', you cannot have "
787 "extra(select=...) on both sides."
788 )
789 self.extra.update(rhs.extra)
790 extra_select_mask = set()
791 if self.extra_select_mask is not None:
792 extra_select_mask.update(self.extra_select_mask)
793 if rhs.extra_select_mask is not None:
794 extra_select_mask.update(rhs.extra_select_mask)
795 if extra_select_mask:
796 self.set_extra_mask(extra_select_mask)
797 self.extra_tables += rhs.extra_tables
798
799 # Ordering uses the 'rhs' ordering, unless it has none, in which case
800 # the current ordering is used.
801 self.order_by = rhs.order_by or self.order_by
802 self.extra_order_by = rhs.extra_order_by or self.extra_order_by
803
804 def _get_defer_select_mask(self, opts, mask, select_mask=None):
805 if select_mask is None:
806 select_mask = {}
807 select_mask[opts.pk] = {}
808 # All concrete fields and related objects that are not part of the
809 # defer mask must be included. If a relational field is encountered it
810 # gets added to the mask for it be considered if `select_related` and
811 # the cycle continues by recursively calling this function.
812 for field in opts.concrete_fields + opts.related_objects:
813 field_mask = mask.pop(field.name, None)
814 field_att_mask = None
815 if field_attname := getattr(field, "attname", None):
816 field_att_mask = mask.pop(field_attname, None)
817 if field_mask is None and field_att_mask is None:
818 select_mask.setdefault(field, {})
819 elif field_mask:
820 if not field.is_relation:
821 raise FieldError(next(iter(field_mask)))
822 # Virtual fields such as many-to-many and generic foreign keys
823 # cannot be effectively deferred. Historically, they were
824 # allowed to be passed to QuerySet.defer(). Ignore such field
825 # references until a layer of validation at mask alteration
826 # time is eventually implemented.
827 if field.many_to_many:
828 continue
829 field_select_mask = select_mask.setdefault(field, {})
830 related_model = field.related_model._meta.concrete_model
831 self._get_defer_select_mask(
832 related_model._meta, field_mask, field_select_mask
833 )
834 # Remaining defer entries must be references to filtered relations
835 # otherwise they are surfaced as missing field errors.
836 for field_name, field_mask in mask.items():
837 if filtered_relation := self._filtered_relations.get(field_name):
838 relation = opts.get_field(filtered_relation.relation_name)
839 field_select_mask = select_mask.setdefault((field_name, relation), {})
840 related_model = relation.related_model._meta.concrete_model
841 self._get_defer_select_mask(
842 related_model._meta, field_mask, field_select_mask
843 )
844 else:
845 opts.get_field(field_name)
846 return select_mask
847
848 def _get_only_select_mask(self, opts, mask, select_mask=None):
849 if select_mask is None:
850 select_mask = {}
851 select_mask[opts.pk] = {}
852 # Only include fields mentioned in the mask.
853 for field_name, field_mask in mask.items():
854 field = opts.get_field(field_name)
855 field_select_mask = select_mask.setdefault(field, {})
856 if field_mask:
857 if not field.is_relation:
858 raise FieldError(next(iter(field_mask)))
859 related_model = field.remote_field.model._meta.concrete_model
860 self._get_only_select_mask(
861 related_model._meta, field_mask, field_select_mask
862 )
863 return select_mask
864
865 def get_select_mask(self):
866 """
867 Convert the self.deferred_loading data structure to an alternate data
868 structure, describing the field that *will* be loaded. This is used to
869 compute the columns to select from the database and also by the
870 QuerySet class to work out which fields are being initialized on each
871 model. Models that have all their fields included aren't mentioned in
872 the result, only those that have field restrictions in place.
873 """
874 field_names, defer = self.deferred_loading
875 if not field_names:
876 return {}
877 mask = {}
878 for field_name in field_names:
879 part_mask = mask
880 for part in field_name.split(LOOKUP_SEP):
881 part_mask = part_mask.setdefault(part, {})
882 opts = self.get_meta()
883 if defer:
884 return self._get_defer_select_mask(opts, mask)
885 return self._get_only_select_mask(opts, mask)
886
887 def table_alias(self, table_name, create=False, filtered_relation=None):
888 """
889 Return a table alias for the given table_name and whether this is a
890 new alias or not.
891
892 If 'create' is true, a new alias is always created. Otherwise, the
893 most recently created alias for the table (if one exists) is reused.
894 """
895 alias_list = self.table_map.get(table_name)
896 if not create and alias_list:
897 alias = alias_list[0]
898 self.alias_refcount[alias] += 1
899 return alias, False
900
901 # Create a new alias for this table.
902 if alias_list:
903 alias = "%s%d" % (self.alias_prefix, len(self.alias_map) + 1)
904 alias_list.append(alias)
905 else:
906 # The first occurrence of a table uses the table name directly.
907 alias = (
908 filtered_relation.alias if filtered_relation is not None else table_name
909 )
910 self.table_map[table_name] = [alias]
911 self.alias_refcount[alias] = 1
912 return alias, True
913
914 def ref_alias(self, alias):
915 """Increases the reference count for this alias."""
916 self.alias_refcount[alias] += 1
917
918 def unref_alias(self, alias, amount=1):
919 """Decreases the reference count for this alias."""
920 self.alias_refcount[alias] -= amount
921
922 def promote_joins(self, aliases):
923 """
924 Promote recursively the join type of given aliases and its children to
925 an outer join. If 'unconditional' is False, only promote the join if
926 it is nullable or the parent join is an outer join.
927
928 The children promotion is done to avoid join chains that contain a LOUTER
929 b INNER c. So, if we have currently a INNER b INNER c and a->b is promoted,
930 then we must also promote b->c automatically, or otherwise the promotion
931 of a->b doesn't actually change anything in the query results.
932 """
933 aliases = list(aliases)
934 while aliases:
935 alias = aliases.pop(0)
936 if self.alias_map[alias].join_type is None:
937 # This is the base table (first FROM entry) - this table
938 # isn't really joined at all in the query, so we should not
939 # alter its join type.
940 continue
941 # Only the first alias (skipped above) should have None join_type
942 assert self.alias_map[alias].join_type is not None
943 parent_alias = self.alias_map[alias].parent_alias
944 parent_louter = (
945 parent_alias and self.alias_map[parent_alias].join_type == LOUTER
946 )
947 already_louter = self.alias_map[alias].join_type == LOUTER
948 if (self.alias_map[alias].nullable or parent_louter) and not already_louter:
949 self.alias_map[alias] = self.alias_map[alias].promote()
950 # Join type of 'alias' changed, so re-examine all aliases that
951 # refer to this one.
952 aliases.extend(
953 join
954 for join in self.alias_map
955 if self.alias_map[join].parent_alias == alias
956 and join not in aliases
957 )
958
959 def demote_joins(self, aliases):
960 """
961 Change join type from LOUTER to INNER for all joins in aliases.
962
963 Similarly to promote_joins(), this method must ensure no join chains
964 containing first an outer, then an inner join are generated. If we
965 are demoting b->c join in chain a LOUTER b LOUTER c then we must
966 demote a->b automatically, or otherwise the demotion of b->c doesn't
967 actually change anything in the query results. .
968 """
969 aliases = list(aliases)
970 while aliases:
971 alias = aliases.pop(0)
972 if self.alias_map[alias].join_type == LOUTER:
973 self.alias_map[alias] = self.alias_map[alias].demote()
974 parent_alias = self.alias_map[alias].parent_alias
975 if self.alias_map[parent_alias].join_type == INNER:
976 aliases.append(parent_alias)
977
978 def reset_refcounts(self, to_counts):
979 """
980 Reset reference counts for aliases so that they match the value passed
981 in `to_counts`.
982 """
983 for alias, cur_refcount in self.alias_refcount.copy().items():
984 unref_amount = cur_refcount - to_counts.get(alias, 0)
985 self.unref_alias(alias, unref_amount)
986
987 def change_aliases(self, change_map):
988 """
989 Change the aliases in change_map (which maps old-alias -> new-alias),
990 relabelling any references to them in select columns and the where
991 clause.
992 """
993 if not change_map:
994 return self
995 # If keys and values of change_map were to intersect, an alias might be
996 # updated twice (e.g. T4 -> T5, T5 -> T6, so also T4 -> T6) depending
997 # on their order in change_map.
998 assert set(change_map).isdisjoint(change_map.values())
999
1000 # 1. Update references in "select" (normal columns plus aliases),
1001 # "group by" and "where".
1002 self.where.relabel_aliases(change_map)
1003 if isinstance(self.group_by, tuple):
1004 self.group_by = tuple(
1005 [col.relabeled_clone(change_map) for col in self.group_by]
1006 )
1007 self.select = tuple([col.relabeled_clone(change_map) for col in self.select])
1008 self.annotations = self.annotations and {
1009 key: col.relabeled_clone(change_map)
1010 for key, col in self.annotations.items()
1011 }
1012
1013 # 2. Rename the alias in the internal table/alias datastructures.
1014 for old_alias, new_alias in change_map.items():
1015 if old_alias not in self.alias_map:
1016 continue
1017 alias_data = self.alias_map[old_alias].relabeled_clone(change_map)
1018 self.alias_map[new_alias] = alias_data
1019 self.alias_refcount[new_alias] = self.alias_refcount[old_alias]
1020 del self.alias_refcount[old_alias]
1021 del self.alias_map[old_alias]
1022
1023 table_aliases = self.table_map[alias_data.table_name]
1024 for pos, alias in enumerate(table_aliases):
1025 if alias == old_alias:
1026 table_aliases[pos] = new_alias
1027 break
1028
1029 # 3. Rename the direct external aliases and the ones of combined
1030 # queries (union, intersection, difference).
1031 self.external_aliases = {
1032 # Table is aliased or it's being changed and thus is aliased.
1033 change_map.get(alias, alias): (aliased or alias in change_map)
1034 for alias, aliased in self.external_aliases.items()
1035 }
1036 for combined_query in self.combined_queries:
1037 external_change_map = {
1038 alias: aliased
1039 for alias, aliased in change_map.items()
1040 if alias in combined_query.external_aliases
1041 }
1042 combined_query.change_aliases(external_change_map)
1043
1044 def bump_prefix(self, other_query, exclude=None):
1045 """
1046 Change the alias prefix to the next letter in the alphabet in a way
1047 that the other query's aliases and this query's aliases will not
1048 conflict. Even tables that previously had no alias will get an alias
1049 after this call. To prevent changing aliases use the exclude parameter.
1050 """
1051
1052 def prefix_gen():
1053 """
1054 Generate a sequence of characters in alphabetical order:
1055 -> 'A', 'B', 'C', ...
1056
1057 When the alphabet is finished, the sequence will continue with the
1058 Cartesian product:
1059 -> 'AA', 'AB', 'AC', ...
1060 """
1061 alphabet = ascii_uppercase
1062 prefix = chr(ord(self.alias_prefix) + 1)
1063 yield prefix
1064 for n in count(1):
1065 seq = alphabet[alphabet.index(prefix) :] if prefix else alphabet
1066 for s in product(seq, repeat=n):
1067 yield "".join(s)
1068 prefix = None
1069
1070 if self.alias_prefix != other_query.alias_prefix:
1071 # No clashes between self and outer query should be possible.
1072 return
1073
1074 # Explicitly avoid infinite loop. The constant divider is based on how
1075 # much depth recursive subquery references add to the stack. This value
1076 # might need to be adjusted when adding or removing function calls from
1077 # the code path in charge of performing these operations.
1078 local_recursion_limit = sys.getrecursionlimit() // 16
1079 for pos, prefix in enumerate(prefix_gen()):
1080 if prefix not in self.subq_aliases:
1081 self.alias_prefix = prefix
1082 break
1083 if pos > local_recursion_limit:
1084 raise RecursionError(
1085 "Maximum recursion depth exceeded: too many subqueries."
1086 )
1087 self.subq_aliases = self.subq_aliases.union([self.alias_prefix])
1088 other_query.subq_aliases = other_query.subq_aliases.union(self.subq_aliases)
1089 if exclude is None:
1090 exclude = {}
1091 self.change_aliases(
1092 {
1093 alias: "%s%d" % (self.alias_prefix, pos)
1094 for pos, alias in enumerate(self.alias_map)
1095 if alias not in exclude
1096 }
1097 )
1098
1099 def get_initial_alias(self):
1100 """
1101 Return the first alias for this query, after increasing its reference
1102 count.
1103 """
1104 if self.alias_map:
1105 alias = self.base_table
1106 self.ref_alias(alias)
1107 elif self.model:
1108 alias = self.join(self.base_table_class(self.get_meta().db_table, None))
1109 else:
1110 alias = None
1111 return alias
1112
1113 def count_active_tables(self):
1114 """
1115 Return the number of tables in this query with a non-zero reference
1116 count. After execution, the reference counts are zeroed, so tables
1117 added in compiler will not be seen by this method.
1118 """
1119 return len([1 for count in self.alias_refcount.values() if count])
1120
1121 def join(self, join, reuse=None):
1122 """
1123 Return an alias for the 'join', either reusing an existing alias for
1124 that join or creating a new one. 'join' is either a base_table_class or
1125 join_class.
1126
1127 The 'reuse' parameter can be either None which means all joins are
1128 reusable, or it can be a set containing the aliases that can be reused.
1129
1130 A join is always created as LOUTER if the lhs alias is LOUTER to make
1131 sure chains like t1 LOUTER t2 INNER t3 aren't generated. All new
1132 joins are created as LOUTER if the join is nullable.
1133 """
1134 reuse_aliases = [
1135 a
1136 for a, j in self.alias_map.items()
1137 if (reuse is None or a in reuse) and j == join
1138 ]
1139 if reuse_aliases:
1140 if join.table_alias in reuse_aliases:
1141 reuse_alias = join.table_alias
1142 else:
1143 # Reuse the most recent alias of the joined table
1144 # (a many-to-many relation may be joined multiple times).
1145 reuse_alias = reuse_aliases[-1]
1146 self.ref_alias(reuse_alias)
1147 return reuse_alias
1148
1149 # No reuse is possible, so we need a new alias.
1150 alias, _ = self.table_alias(
1151 join.table_name, create=True, filtered_relation=join.filtered_relation
1152 )
1153 if join.join_type:
1154 if self.alias_map[join.parent_alias].join_type == LOUTER or join.nullable:
1155 join_type = LOUTER
1156 else:
1157 join_type = INNER
1158 join.join_type = join_type
1159 join.table_alias = alias
1160 self.alias_map[alias] = join
1161 if filtered_relation := join.filtered_relation:
1162 resolve_reuse = reuse
1163 if resolve_reuse is not None:
1164 resolve_reuse = set(reuse) | {alias}
1165 joins_len = len(self.alias_map)
1166 join.filtered_relation = filtered_relation.resolve_expression(
1167 self, reuse=resolve_reuse
1168 )
1169 # Some joins were during expression resolving, they must be present
1170 # before the one we just added.
1171 if joins_len < len(self.alias_map):
1172 self.alias_map[alias] = self.alias_map.pop(alias)
1173 return alias
1174
1175 def join_parent_model(self, opts, model, alias, seen):
1176 """
1177 Make sure the given 'model' is joined in the query. If 'model' isn't
1178 a parent of 'opts' or if it is None this method is a no-op.
1179
1180 The 'alias' is the root alias for starting the join, 'seen' is a dict
1181 of model -> alias of existing joins. It must also contain a mapping
1182 of None -> some alias. This will be returned in the no-op case.
1183 """
1184 if model in seen:
1185 return seen[model]
1186 chain = opts.get_base_chain(model)
1187 if not chain:
1188 return alias
1189 curr_opts = opts
1190 for int_model in chain:
1191 if int_model in seen:
1192 curr_opts = int_model._meta
1193 alias = seen[int_model]
1194 continue
1195 # Proxy model have elements in base chain
1196 # with no parents, assign the new options
1197 # object and skip to the next base in that
1198 # case
1199 if not curr_opts.parents[int_model]:
1200 curr_opts = int_model._meta
1201 continue
1202 link_field = curr_opts.get_ancestor_link(int_model)
1203 join_info = self.setup_joins([link_field.name], curr_opts, alias)
1204 curr_opts = int_model._meta
1205 alias = seen[int_model] = join_info.joins[-1]
1206 return alias or seen[None]
1207
1208 def check_alias(self, alias):
1209 if FORBIDDEN_ALIAS_PATTERN.search(alias):
1210 raise ValueError(
1211 "Column aliases cannot contain whitespace characters, quotation marks, "
1212 "semicolons, or SQL comments."
1213 )
1214
1215 def add_annotation(self, annotation, alias, select=True):
1216 """Add a single annotation expression to the Query."""
1217 self.check_alias(alias)
1218 annotation = annotation.resolve_expression(self, allow_joins=True, reuse=None)
1219 if select:
1220 self.append_annotation_mask([alias])
1221 else:
1222 self.set_annotation_mask(set(self.annotation_select).difference({alias}))
1223 self.annotations[alias] = annotation
1224 if self.selected:
1225 self.selected[alias] = alias
1226
1227 def resolve_expression(self, query, *args, **kwargs):
1228 clone = self.clone()
1229 # Subqueries need to use a different set of aliases than the outer query.
1230 clone.bump_prefix(query)
1231 clone.subquery = True
1232 clone.where.resolve_expression(query, *args, **kwargs)
1233 # Resolve combined queries.
1234 if clone.combinator:
1235 clone.combined_queries = tuple(
1236 [
1237 combined_query.resolve_expression(query, *args, **kwargs)
1238 for combined_query in clone.combined_queries
1239 ]
1240 )
1241 for key, value in clone.annotations.items():
1242 resolved = value.resolve_expression(query, *args, **kwargs)
1243 if hasattr(resolved, "external_aliases"):
1244 resolved.external_aliases.update(clone.external_aliases)
1245 clone.annotations[key] = resolved
1246 # Outer query's aliases are considered external.
1247 for alias, table in query.alias_map.items():
1248 clone.external_aliases[alias] = (
1249 isinstance(table, Join)
1250 and table.join_field.related_model._meta.db_table != alias
1251 ) or (
1252 isinstance(table, BaseTable) and table.table_name != table.table_alias
1253 )
1254 return clone
1255
1256 def get_external_cols(self):
1257 exprs = chain(self.annotations.values(), self.where.children)
1258 return [
1259 col
1260 for col in self._gen_cols(exprs, include_external=True)
1261 if col.alias in self.external_aliases
1262 ]
1263
1264 def get_group_by_cols(self, wrapper=None):
1265 # If wrapper is referenced by an alias for an explicit GROUP BY through
1266 # values() a reference to this expression and not the self must be
1267 # returned to ensure external column references are not grouped against
1268 # as well.
1269 external_cols = self.get_external_cols()
1270 if any(col.possibly_multivalued for col in external_cols):
1271 return [wrapper or self]
1272 return external_cols
1273
1274 def as_sql(self, compiler, connection):
1275 # Some backends (e.g. Oracle) raise an error when a subquery contains
1276 # unnecessary ORDER BY clause.
1277 if (
1278 self.subquery
1279 and not connection.features.ignores_unnecessary_order_by_in_subqueries
1280 ):
1281 self.clear_ordering(force=False)
1282 for query in self.combined_queries:
1283 query.clear_ordering(force=False)
1284 sql, params = self.get_compiler(connection=connection).as_sql()
1285 if self.subquery:
1286 sql = "(%s)" % sql
1287 return sql, params
1288
1289 def resolve_lookup_value(self, value, can_reuse, allow_joins, summarize=False):
1290 if hasattr(value, "resolve_expression"):
1291 value = value.resolve_expression(
1292 self,
1293 reuse=can_reuse,
1294 allow_joins=allow_joins,
1295 summarize=summarize,
1296 )
1297 elif isinstance(value, (list, tuple)):
1298 # The items of the iterable may be expressions and therefore need
1299 # to be resolved independently.
1300 values = (
1301 self.resolve_lookup_value(sub_value, can_reuse, allow_joins, summarize)
1302 for sub_value in value
1303 )
1304 type_ = type(value)
1305 if hasattr(type_, "_make"): # namedtuple
1306 return type_(*values)
1307 return type_(values)
1308 return value
1309
1310 def solve_lookup_type(self, lookup, summarize=False):
1311 """
1312 Solve the lookup type from the lookup (e.g.: 'foobar__id__icontains').
1313 """
1314 lookup_splitted = lookup.split(LOOKUP_SEP)
1315 if self.annotations:
1316 annotation, expression_lookups = refs_expression(
1317 lookup_splitted, self.annotations
1318 )
1319 if annotation:
1320 expression = self.annotations[annotation]
1321 if summarize:
1322 expression = Ref(annotation, expression)
1323 return expression_lookups, (), expression
1324 _, field, _, lookup_parts = self.names_to_path(lookup_splitted, self.get_meta())
1325 field_parts = lookup_splitted[0 : len(lookup_splitted) - len(lookup_parts)]
1326 if len(lookup_parts) > 1 and not field_parts:
1327 raise FieldError(
1328 'Invalid lookup "%s" for model %s".'
1329 % (lookup, self.get_meta().model.__name__)
1330 )
1331 return lookup_parts, field_parts, False
1332
1333 def check_query_object_type(self, value, opts, field):
1334 """
1335 Check whether the object passed while querying is of the correct type.
1336 If not, raise a ValueError specifying the wrong object.
1337 """
1338 if hasattr(value, "_meta"):
1339 if not check_rel_lookup_compatibility(value._meta.model, opts, field):
1340 raise ValueError(
1341 'Cannot query "%s": Must be "%s" instance.'
1342 % (value, opts.object_name)
1343 )
1344
1345 def check_related_objects(self, field, value, opts):
1346 """Check the type of object passed to query relations."""
1347 if field.is_relation:
1348 # Check that the field and the queryset use the same model in a
1349 # query like .filter(author=Author.objects.all()). For example, the
1350 # opts would be Author's (from the author field) and value.model
1351 # would be Author.objects.all() queryset's .model (Author also).
1352 # The field is the related field on the lhs side.
1353 if (
1354 isinstance(value, Query)
1355 and not value.has_select_fields
1356 and not check_rel_lookup_compatibility(value.model, opts, field)
1357 ):
1358 raise ValueError(
1359 'Cannot use QuerySet for "%s": Use a QuerySet for "%s".'
1360 % (value.model._meta.object_name, opts.object_name)
1361 )
1362 elif hasattr(value, "_meta"):
1363 self.check_query_object_type(value, opts, field)
1364 elif hasattr(value, "__iter__"):
1365 for v in value:
1366 self.check_query_object_type(v, opts, field)
1367
1368 def check_filterable(self, expression):
1369 """Raise an error if expression cannot be used in a WHERE clause."""
1370 if hasattr(expression, "resolve_expression") and not getattr(
1371 expression, "filterable", True
1372 ):
1373 raise NotSupportedError(
1374 expression.__class__.__name__ + " is disallowed in the filter "
1375 "clause."
1376 )
1377 if hasattr(expression, "get_source_expressions"):
1378 for expr in expression.get_source_expressions():
1379 self.check_filterable(expr)
1380
1381 def build_lookup(self, lookups, lhs, rhs):
1382 """
1383 Try to extract transforms and lookup from given lhs.
1384
1385 The lhs value is something that works like SQLExpression.
1386 The rhs value is what the lookup is going to compare against.
1387 The lookups is a list of names to extract using get_lookup()
1388 and get_transform().
1389 """
1390 # __exact is the default lookup if one isn't given.
1391 *transforms, lookup_name = lookups or ["exact"]
1392 for name in transforms:
1393 lhs = self.try_transform(lhs, name, lookups)
1394 # First try get_lookup() so that the lookup takes precedence if the lhs
1395 # supports both transform and lookup for the name.
1396 lookup_class = lhs.get_lookup(lookup_name)
1397 if not lookup_class:
1398 # A lookup wasn't found. Try to interpret the name as a transform
1399 # and do an Exact lookup against it.
1400 lhs = self.try_transform(lhs, lookup_name)
1401 lookup_name = "exact"
1402 lookup_class = lhs.get_lookup(lookup_name)
1403 if not lookup_class:
1404 return
1405
1406 lookup = lookup_class(lhs, rhs)
1407 # Interpret '__exact=None' as the sql 'is NULL'; otherwise, reject all
1408 # uses of None as a query value unless the lookup supports it.
1409 if lookup.rhs is None and not lookup.can_use_none_as_rhs:
1410 if lookup_name not in ("exact", "iexact"):
1411 raise ValueError("Cannot use None as a query value")
1412 return lhs.get_lookup("isnull")(lhs, True)
1413
1414 # For Oracle '' is equivalent to null. The check must be done at this
1415 # stage because join promotion can't be done in the compiler. Using
1416 # DEFAULT_DB_ALIAS isn't nice but it's the best that can be done here.
1417 # A similar thing is done in is_nullable(), too.
1418 if (
1419 lookup_name == "exact"
1420 and lookup.rhs == ""
1421 and connections[DEFAULT_DB_ALIAS].features.interprets_empty_strings_as_nulls
1422 ):
1423 return lhs.get_lookup("isnull")(lhs, True)
1424
1425 return lookup
1426
1427 def try_transform(self, lhs, name, lookups=None):
1428 """
1429 Helper method for build_lookup(). Try to fetch and initialize
1430 a transform for name parameter from lhs.
1431 """
1432 transform_class = lhs.get_transform(name)
1433 if transform_class:
1434 return transform_class(lhs)
1435 else:
1436 output_field = lhs.output_field.__class__
1437 suggested_lookups = difflib.get_close_matches(
1438 name, lhs.output_field.get_lookups()
1439 )
1440 if suggested_lookups:
1441 suggestion = ", perhaps you meant %s?" % " or ".join(suggested_lookups)
1442 else:
1443 suggestion = "."
1444 if lookups is not None:
1445 name_index = lookups.index(name)
1446 unsupported_lookup = LOOKUP_SEP.join(lookups[name_index:])
1447 else:
1448 unsupported_lookup = name
1449 raise FieldError(
1450 "Unsupported lookup '%s' for %s or join on the field not "
1451 "permitted%s" % (unsupported_lookup, output_field.__name__, suggestion)
1452 )
1453
1454 def build_filter(
1455 self,
1456 filter_expr,
1457 branch_negated=False,
1458 current_negated=False,
1459 can_reuse=None,
1460 allow_joins=True,
1461 split_subq=True,
1462 check_filterable=True,
1463 summarize=False,
1464 update_join_types=True,
1465 ):
1466 """
1467 Build a WhereNode for a single filter clause but don't add it
1468 to this Query. Query.add_q() will then add this filter to the where
1469 Node.
1470
1471 The 'branch_negated' tells us if the current branch contains any
1472 negations. This will be used to determine if subqueries are needed.
1473
1474 The 'current_negated' is used to determine if the current filter is
1475 negated or not and this will be used to determine if IS NULL filtering
1476 is needed.
1477
1478 The difference between current_negated and branch_negated is that
1479 branch_negated is set on first negation, but current_negated is
1480 flipped for each negation.
1481
1482 Note that add_filter will not do any negating itself, that is done
1483 upper in the code by add_q().
1484
1485 The 'can_reuse' is a set of reusable joins for multijoins.
1486
1487 The method will create a filter clause that can be added to the current
1488 query. However, if the filter isn't added to the query then the caller
1489 is responsible for unreffing the joins used.
1490 """
1491 if isinstance(filter_expr, dict):
1492 raise FieldError("Cannot parse keyword query as dict")
1493 if isinstance(filter_expr, Q):
1494 return self._add_q(
1495 filter_expr,
1496 branch_negated=branch_negated,
1497 current_negated=current_negated,
1498 used_aliases=can_reuse,
1499 allow_joins=allow_joins,
1500 split_subq=split_subq,
1501 check_filterable=check_filterable,
1502 summarize=summarize,
1503 update_join_types=update_join_types,
1504 )
1505 if hasattr(filter_expr, "resolve_expression"):
1506 if not getattr(filter_expr, "conditional", False):
1507 raise TypeError("Cannot filter against a non-conditional expression.")
1508 condition = filter_expr.resolve_expression(
1509 self, allow_joins=allow_joins, reuse=can_reuse, summarize=summarize
1510 )
1511 if not isinstance(condition, Lookup):
1512 condition = self.build_lookup(["exact"], condition, True)
1513 return WhereNode([condition], connector=AND), []
1514 arg, value = filter_expr
1515 if not arg:
1516 raise FieldError("Cannot parse keyword query %r" % arg)
1517 lookups, parts, reffed_expression = self.solve_lookup_type(arg, summarize)
1518
1519 if check_filterable:
1520 self.check_filterable(reffed_expression)
1521
1522 if not allow_joins and len(parts) > 1:
1523 raise FieldError("Joined field references are not permitted in this query")
1524
1525 pre_joins = self.alias_refcount.copy()
1526 value = self.resolve_lookup_value(value, can_reuse, allow_joins, summarize)
1527 used_joins = {
1528 k for k, v in self.alias_refcount.items() if v > pre_joins.get(k, 0)
1529 }
1530
1531 if check_filterable:
1532 self.check_filterable(value)
1533
1534 if reffed_expression:
1535 condition = self.build_lookup(lookups, reffed_expression, value)
1536 return WhereNode([condition], connector=AND), []
1537
1538 opts = self.get_meta()
1539 alias = self.get_initial_alias()
1540 allow_many = not branch_negated or not split_subq
1541
1542 try:
1543 join_info = self.setup_joins(
1544 parts,
1545 opts,
1546 alias,
1547 can_reuse=can_reuse,
1548 allow_many=allow_many,
1549 )
1550
1551 # Prevent iterator from being consumed by check_related_objects()
1552 if isinstance(value, Iterator):
1553 value = list(value)
1554 self.check_related_objects(join_info.final_field, value, join_info.opts)
1555
1556 # split_exclude() needs to know which joins were generated for the
1557 # lookup parts
1558 self._lookup_joins = join_info.joins
1559 except MultiJoin as e:
1560 return self.split_exclude(filter_expr, can_reuse, e.names_with_path)
1561
1562 # Update used_joins before trimming since they are reused to determine
1563 # which joins could be later promoted to INNER.
1564 used_joins.update(join_info.joins)
1565 targets, alias, join_list = self.trim_joins(
1566 join_info.targets, join_info.joins, join_info.path
1567 )
1568 if can_reuse is not None:
1569 can_reuse.update(join_list)
1570
1571 if join_info.final_field.is_relation:
1572 if len(targets) == 1:
1573 col = self._get_col(targets[0], join_info.final_field, alias)
1574 else:
1575 col = ColPairs(alias, targets, join_info.targets, join_info.final_field)
1576 else:
1577 col = self._get_col(targets[0], join_info.final_field, alias)
1578
1579 condition = self.build_lookup(lookups, col, value)
1580 lookup_type = condition.lookup_name
1581 clause = WhereNode([condition], connector=AND)
1582
1583 require_outer = (
1584 lookup_type == "isnull" and condition.rhs is True and not current_negated
1585 )
1586 if (
1587 current_negated
1588 and (lookup_type != "isnull" or condition.rhs is False)
1589 and condition.rhs is not None
1590 ):
1591 require_outer = True
1592 if lookup_type != "isnull":
1593 # The condition added here will be SQL like this:
1594 # NOT (col IS NOT NULL), where the first NOT is added in
1595 # upper layers of code. The reason for addition is that if col
1596 # is null, then col != someval will result in SQL "unknown"
1597 # which isn't the same as in Python. The Python None handling
1598 # is wanted, and it can be gotten by
1599 # (col IS NULL OR col != someval)
1600 # <=>
1601 # NOT (col IS NOT NULL AND col = someval).
1602 if (
1603 self.is_nullable(targets[0])
1604 or self.alias_map[join_list[-1]].join_type == LOUTER
1605 ):
1606 lookup_class = targets[0].get_lookup("isnull")
1607 col = self._get_col(targets[0], join_info.targets[0], alias)
1608 clause.add(lookup_class(col, False), AND)
1609 # If someval is a nullable column, someval IS NOT NULL is
1610 # added.
1611 if isinstance(value, Col) and self.is_nullable(value.target):
1612 lookup_class = value.target.get_lookup("isnull")
1613 clause.add(lookup_class(value, False), AND)
1614 return clause, used_joins if not require_outer else ()
1615
1616 def add_filter(self, filter_lhs, filter_rhs):
1617 self.add_q(Q((filter_lhs, filter_rhs)))
1618
1619 def add_q(self, q_object):
1620 """
1621 A preprocessor for the internal _add_q(). Responsible for doing final
1622 join promotion.
1623 """
1624 # For join promotion this case is doing an AND for the added q_object
1625 # and existing conditions. So, any existing inner join forces the join
1626 # type to remain inner. Existing outer joins can however be demoted.
1627 # (Consider case where rel_a is LOUTER and rel_a__col=1 is added - if
1628 # rel_a doesn't produce any rows, then the whole condition must fail.
1629 # So, demotion is OK.
1630 existing_inner = {
1631 a for a in self.alias_map if self.alias_map[a].join_type == INNER
1632 }
1633 clause, _ = self._add_q(q_object, self.used_aliases)
1634 if clause:
1635 self.where.add(clause, AND)
1636 self.demote_joins(existing_inner)
1637
1638 def build_where(self, filter_expr):
1639 return self.build_filter(filter_expr, allow_joins=False)[0]
1640
1641 def clear_where(self):
1642 self.where = WhereNode()
1643
1644 def _add_q(
1645 self,
1646 q_object,
1647 used_aliases,
1648 branch_negated=False,
1649 current_negated=False,
1650 allow_joins=True,
1651 split_subq=True,
1652 check_filterable=True,
1653 summarize=False,
1654 update_join_types=True,
1655 ):
1656 """Add a Q-object to the current filter."""
1657 connector = q_object.connector
1658 current_negated ^= q_object.negated
1659 branch_negated = branch_negated or q_object.negated
1660 target_clause = WhereNode(connector=connector, negated=q_object.negated)
1661 joinpromoter = JoinPromoter(
1662 q_object.connector, len(q_object.children), current_negated
1663 )
1664 for child in q_object.children:
1665 child_clause, needed_inner = self.build_filter(
1666 child,
1667 can_reuse=used_aliases,
1668 branch_negated=branch_negated,
1669 current_negated=current_negated,
1670 allow_joins=allow_joins,
1671 split_subq=split_subq,
1672 check_filterable=check_filterable,
1673 summarize=summarize,
1674 update_join_types=update_join_types,
1675 )
1676 joinpromoter.add_votes(needed_inner)
1677 if child_clause:
1678 target_clause.add(child_clause, connector)
1679 if update_join_types:
1680 needed_inner = joinpromoter.update_join_types(self)
1681 else:
1682 needed_inner = []
1683 return target_clause, needed_inner
1684
1685 def add_filtered_relation(self, filtered_relation, alias):
1686 filtered_relation.alias = alias
1687 relation_lookup_parts, relation_field_parts, _ = self.solve_lookup_type(
1688 filtered_relation.relation_name
1689 )
1690 if relation_lookup_parts:
1691 raise ValueError(
1692 "FilteredRelation's relation_name cannot contain lookups "
1693 "(got %r)." % filtered_relation.relation_name
1694 )
1695 for lookup in get_children_from_q(filtered_relation.condition):
1696 lookup_parts, lookup_field_parts, _ = self.solve_lookup_type(lookup)
1697 shift = 2 if not lookup_parts else 1
1698 lookup_field_path = lookup_field_parts[:-shift]
1699 for idx, lookup_field_part in enumerate(lookup_field_path):
1700 if len(relation_field_parts) > idx:
1701 if relation_field_parts[idx] != lookup_field_part:
1702 raise ValueError(
1703 "FilteredRelation's condition doesn't support "
1704 "relations outside the %r (got %r)."
1705 % (filtered_relation.relation_name, lookup)
1706 )
1707 if len(lookup_field_parts) > len(relation_field_parts) + 1:
1708 raise ValueError(
1709 "FilteredRelation's condition doesn't support nested "
1710 "relations deeper than the relation_name (got %r for "
1711 "%r)." % (lookup, filtered_relation.relation_name)
1712 )
1713 filtered_relation.condition = rename_prefix_from_q(
1714 filtered_relation.relation_name,
1715 alias,
1716 filtered_relation.condition,
1717 )
1718 self._filtered_relations[filtered_relation.alias] = filtered_relation
1719
1720 def names_to_path(self, names, opts, allow_many=True, fail_on_missing=False):
1721 """
1722 Walk the list of names and turns them into PathInfo tuples. A single
1723 name in 'names' can generate multiple PathInfos (m2m, for example).
1724
1725 'names' is the path of names to travel, 'opts' is the model Options we
1726 start the name resolving from, 'allow_many' is as for setup_joins().
1727 If fail_on_missing is set to True, then a name that can't be resolved
1728 will generate a FieldError.
1729
1730 Return a list of PathInfo tuples. In addition return the final field
1731 (the last used join field) and target (which is a field guaranteed to
1732 contain the same value as the final field). Finally, return those names
1733 that weren't found (which are likely transforms and the final lookup).
1734 """
1735 path, names_with_path = [], []
1736 for pos, name in enumerate(names):
1737 cur_names_with_path = (name, [])
1738 if name == "pk" and opts is not None:
1739 name = opts.pk.name
1740
1741 field = None
1742 filtered_relation = None
1743 try:
1744 if opts is None:
1745 raise FieldDoesNotExist
1746 field = opts.get_field(name)
1747 except FieldDoesNotExist:
1748 if name in self.annotation_select:
1749 field = self.annotation_select[name].output_field
1750 elif name in self._filtered_relations and pos == 0:
1751 filtered_relation = self._filtered_relations[name]
1752 if LOOKUP_SEP in filtered_relation.relation_name:
1753 parts = filtered_relation.relation_name.split(LOOKUP_SEP)
1754 filtered_relation_path, field, _, _ = self.names_to_path(
1755 parts,
1756 opts,
1757 allow_many,
1758 fail_on_missing,
1759 )
1760 path.extend(filtered_relation_path[:-1])
1761 else:
1762 field = opts.get_field(filtered_relation.relation_name)
1763 if field is not None:
1764 # Fields that contain one-to-many relations with a generic
1765 # model (like a GenericForeignKey) cannot generate reverse
1766 # relations and therefore cannot be used for reverse querying.
1767 if field.is_relation and not field.related_model:
1768 raise FieldError(
1769 "Field %r does not generate an automatic reverse "
1770 "relation and therefore cannot be used for reverse "
1771 "querying. If it is a GenericForeignKey, consider "
1772 "adding a GenericRelation." % name
1773 )
1774 try:
1775 model = field.model._meta.concrete_model
1776 except AttributeError:
1777 # QuerySet.annotate() may introduce fields that aren't
1778 # attached to a model.
1779 model = None
1780 else:
1781 # We didn't find the current field, so move position back
1782 # one step.
1783 pos -= 1
1784 if pos == -1 or fail_on_missing:
1785 available = sorted(
1786 [
1787 *get_field_names_from_opts(opts),
1788 *self.annotation_select,
1789 *self._filtered_relations,
1790 ]
1791 )
1792 raise FieldError(
1793 "Cannot resolve keyword '%s' into field. "
1794 "Choices are: %s" % (name, ", ".join(available))
1795 )
1796 break
1797 # Check if we need any joins for concrete inheritance cases (the
1798 # field lives in parent, but we are currently in one of its
1799 # children)
1800 if opts is not None and model is not opts.model:
1801 path_to_parent = opts.get_path_to_parent(model)
1802 if path_to_parent:
1803 path.extend(path_to_parent)
1804 cur_names_with_path[1].extend(path_to_parent)
1805 opts = path_to_parent[-1].to_opts
1806 if hasattr(field, "path_infos"):
1807 if filtered_relation:
1808 pathinfos = field.get_path_info(filtered_relation)
1809 else:
1810 pathinfos = field.path_infos
1811 if not allow_many:
1812 for inner_pos, p in enumerate(pathinfos):
1813 if p.m2m:
1814 cur_names_with_path[1].extend(pathinfos[0 : inner_pos + 1])
1815 names_with_path.append(cur_names_with_path)
1816 raise MultiJoin(pos + 1, names_with_path)
1817 last = pathinfos[-1]
1818 path.extend(pathinfos)
1819 final_field = last.join_field
1820 opts = last.to_opts
1821 targets = last.target_fields
1822 cur_names_with_path[1].extend(pathinfos)
1823 names_with_path.append(cur_names_with_path)
1824 else:
1825 # Local non-relational field.
1826 final_field = field
1827 targets = (field,)
1828 if fail_on_missing and pos + 1 != len(names):
1829 raise FieldError(
1830 "Cannot resolve keyword %r into field. Join on '%s'"
1831 " not permitted." % (names[pos + 1], name)
1832 )
1833 break
1834 return path, final_field, targets, names[pos + 1 :]
1835
1836 def setup_joins(
1837 self,
1838 names,
1839 opts,
1840 alias,
1841 can_reuse=None,
1842 allow_many=True,
1843 ):
1844 """
1845 Compute the necessary table joins for the passage through the fields
1846 given in 'names'. 'opts' is the Options class for the current model
1847 (which gives the table we are starting from), 'alias' is the alias for
1848 the table to start the joining from.
1849
1850 The 'can_reuse' defines the reverse foreign key joins we can reuse. It
1851 can be None in which case all joins are reusable or a set of aliases
1852 that can be reused. Note that non-reverse foreign keys are always
1853 reusable when using setup_joins().
1854
1855 If 'allow_many' is False, then any reverse foreign key seen will
1856 generate a MultiJoin exception.
1857
1858 Return the final field involved in the joins, the target field (used
1859 for any 'where' constraint), the final 'opts' value, the joins, the
1860 field path traveled to generate the joins, and a transform function
1861 that takes a field and alias and is equivalent to `field.get_col(alias)`
1862 in the simple case but wraps field transforms if they were included in
1863 names.
1864
1865 The target field is the field containing the concrete value. Final
1866 field can be something different, for example foreign key pointing to
1867 that value. Final field is needed for example in some value
1868 conversions (convert 'obj' in fk__id=obj to pk val using the foreign
1869 key field for example).
1870 """
1871 joins = [alias]
1872 # The transform can't be applied yet, as joins must be trimmed later.
1873 # To avoid making every caller of this method look up transforms
1874 # directly, compute transforms here and create a partial that converts
1875 # fields to the appropriate wrapped version.
1876
1877 def final_transformer(field, alias):
1878 if not self.alias_cols:
1879 alias = None
1880 return field.get_col(alias)
1881
1882 # Try resolving all the names as fields first. If there's an error,
1883 # treat trailing names as lookups until a field can be resolved.
1884 last_field_exception = None
1885 for pivot in range(len(names), 0, -1):
1886 try:
1887 path, final_field, targets, rest = self.names_to_path(
1888 names[:pivot],
1889 opts,
1890 allow_many,
1891 fail_on_missing=True,
1892 )
1893 except FieldError as exc:
1894 if pivot == 1:
1895 # The first item cannot be a lookup, so it's safe
1896 # to raise the field error here.
1897 raise
1898 else:
1899 last_field_exception = exc
1900 else:
1901 # The transforms are the remaining items that couldn't be
1902 # resolved into fields.
1903 transforms = names[pivot:]
1904 break
1905 for name in transforms:
1906
1907 def transform(field, alias, *, name, previous):
1908 try:
1909 wrapped = previous(field, alias)
1910 return self.try_transform(wrapped, name)
1911 except FieldError:
1912 # FieldError is raised if the transform doesn't exist.
1913 if isinstance(final_field, Field) and last_field_exception:
1914 raise last_field_exception
1915 else:
1916 raise
1917
1918 final_transformer = functools.partial(
1919 transform, name=name, previous=final_transformer
1920 )
1921 final_transformer.has_transforms = True
1922 # Then, add the path to the query's joins. Note that we can't trim
1923 # joins at this stage - we will need the information about join type
1924 # of the trimmed joins.
1925 for join in path:
1926 if join.filtered_relation:
1927 filtered_relation = join.filtered_relation.clone()
1928 table_alias = filtered_relation.alias
1929 else:
1930 filtered_relation = None
1931 table_alias = None
1932 opts = join.to_opts
1933 if join.direct:
1934 nullable = self.is_nullable(join.join_field)
1935 else:
1936 nullable = True
1937 connection = self.join_class(
1938 opts.db_table,
1939 alias,
1940 table_alias,
1941 INNER,
1942 join.join_field,
1943 nullable,
1944 filtered_relation=filtered_relation,
1945 )
1946 reuse = can_reuse if join.m2m else None
1947 alias = self.join(connection, reuse=reuse)
1948 joins.append(alias)
1949 return JoinInfo(final_field, targets, opts, joins, path, final_transformer)
1950
1951 def trim_joins(self, targets, joins, path):
1952 """
1953 The 'target' parameter is the final field being joined to, 'joins'
1954 is the full list of join aliases. The 'path' contain the PathInfos
1955 used to create the joins.
1956
1957 Return the final target field and table alias and the new active
1958 joins.
1959
1960 Always trim any direct join if the target column is already in the
1961 previous table. Can't trim reverse joins as it's unknown if there's
1962 anything on the other side of the join.
1963 """
1964 joins = joins[:]
1965 for pos, info in enumerate(reversed(path)):
1966 if len(joins) == 1 or not info.direct:
1967 break
1968 if info.filtered_relation:
1969 break
1970 join_targets = {t.column for t in info.join_field.foreign_related_fields}
1971 cur_targets = {t.column for t in targets}
1972 if not cur_targets.issubset(join_targets):
1973 break
1974 targets_dict = {
1975 r[1].column: r[0]
1976 for r in info.join_field.related_fields
1977 if r[1].column in cur_targets
1978 }
1979 targets = tuple(targets_dict[t.column] for t in targets)
1980 self.unref_alias(joins.pop())
1981 return targets, joins[-1], joins
1982
1983 @classmethod
1984 def _gen_cols(cls, exprs, include_external=False, resolve_refs=True):
1985 for expr in exprs:
1986 if isinstance(expr, Col):
1987 yield expr
1988 elif include_external and callable(
1989 getattr(expr, "get_external_cols", None)
1990 ):
1991 yield from expr.get_external_cols()
1992 elif hasattr(expr, "get_source_expressions"):
1993 if not resolve_refs and isinstance(expr, Ref):
1994 continue
1995 yield from cls._gen_cols(
1996 expr.get_source_expressions(),
1997 include_external=include_external,
1998 resolve_refs=resolve_refs,
1999 )
2000
2001 @classmethod
2002 def _gen_col_aliases(cls, exprs):
2003 yield from (expr.alias for expr in cls._gen_cols(exprs))
2004
2005 def resolve_ref(self, name, allow_joins=True, reuse=None, summarize=False):
2006 annotation = self.annotations.get(name)
2007 if annotation is not None:
2008 if not allow_joins:
2009 for alias in self._gen_col_aliases([annotation]):
2010 if isinstance(self.alias_map[alias], Join):
2011 raise FieldError(
2012 "Joined field references are not permitted in this query"
2013 )
2014 if summarize:
2015 # Summarize currently means we are doing an aggregate() query
2016 # which is executed as a wrapped subquery if any of the
2017 # aggregate() elements reference an existing annotation. In
2018 # that case we need to return a Ref to the subquery's annotation.
2019 if name not in self.annotation_select:
2020 raise FieldError(
2021 "Cannot aggregate over the '%s' alias. Use annotate() "
2022 "to promote it." % name
2023 )
2024 return Ref(name, self.annotation_select[name])
2025 else:
2026 return annotation
2027 else:
2028 field_list = name.split(LOOKUP_SEP)
2029 annotation = self.annotations.get(field_list[0])
2030 if annotation is not None:
2031 for transform in field_list[1:]:
2032 annotation = self.try_transform(annotation, transform)
2033 return annotation
2034 join_info = self.setup_joins(
2035 field_list, self.get_meta(), self.get_initial_alias(), can_reuse=reuse
2036 )
2037 targets, final_alias, join_list = self.trim_joins(
2038 join_info.targets, join_info.joins, join_info.path
2039 )
2040 if not allow_joins and len(join_list) > 1:
2041 raise FieldError(
2042 "Joined field references are not permitted in this query"
2043 )
2044 if len(targets) > 1:
2045 raise FieldError(
2046 "Referencing multicolumn fields with F() objects isn't supported"
2047 )
2048 # Verify that the last lookup in name is a field or a transform:
2049 # transform_function() raises FieldError if not.
2050 transform = join_info.transform_function(targets[0], final_alias)
2051 if reuse is not None:
2052 reuse.update(join_list)
2053 return transform
2054
2055 def split_exclude(self, filter_expr, can_reuse, names_with_path):
2056 """
2057 When doing an exclude against any kind of N-to-many relation, we need
2058 to use a subquery. This method constructs the nested query, given the
2059 original exclude filter (filter_expr) and the portion up to the first
2060 N-to-many relation field.
2061
2062 For example, if the origin filter is ~Q(child__name='foo'), filter_expr
2063 is ('child__name', 'foo') and can_reuse is a set of joins usable for
2064 filters in the original query.
2065
2066 We will turn this into equivalent of:
2067 WHERE NOT EXISTS(
2068 SELECT 1
2069 FROM child
2070 WHERE name = 'foo' AND child.parent_id = parent.id
2071 LIMIT 1
2072 )
2073 """
2074 # Generate the inner query.
2075 query = self.__class__(self.model)
2076 query._filtered_relations = self._filtered_relations
2077 filter_lhs, filter_rhs = filter_expr
2078 if isinstance(filter_rhs, OuterRef):
2079 filter_rhs = OuterRef(filter_rhs)
2080 elif isinstance(filter_rhs, F):
2081 filter_rhs = OuterRef(filter_rhs.name)
2082 query.add_filter(filter_lhs, filter_rhs)
2083 query.clear_ordering(force=True)
2084 # Try to have as simple as possible subquery -> trim leading joins from
2085 # the subquery.
2086 trimmed_prefix, contains_louter = query.trim_start(names_with_path)
2087
2088 col = query.select[0]
2089 select_field = col.target
2090 alias = col.alias
2091 if alias in can_reuse:
2092 pk = select_field.model._meta.pk
2093 # Need to add a restriction so that outer query's filters are in effect for
2094 # the subquery, too.
2095 query.bump_prefix(self)
2096 lookup_class = select_field.get_lookup("exact")
2097 # Note that the query.select[0].alias is different from alias
2098 # due to bump_prefix above.
2099 lookup = lookup_class(pk.get_col(query.select[0].alias), pk.get_col(alias))
2100 query.where.add(lookup, AND)
2101 query.external_aliases[alias] = True
2102 else:
2103 lookup_class = select_field.get_lookup("exact")
2104 lookup = lookup_class(col, ResolvedOuterRef(trimmed_prefix))
2105 query.where.add(lookup, AND)
2106
2107 condition, needed_inner = self.build_filter(Exists(query))
2108
2109 if contains_louter:
2110 or_null_condition, _ = self.build_filter(
2111 ("%s__isnull" % trimmed_prefix, True),
2112 current_negated=True,
2113 branch_negated=True,
2114 can_reuse=can_reuse,
2115 )
2116 condition.add(or_null_condition, OR)
2117 # Note that the end result will be:
2118 # NOT EXISTS (inner_q) OR outercol IS NULL
2119 # this might look crazy but due to how NULL works, this seems to be
2120 # correct. If the IS NULL check is removed, then if outercol
2121 # IS NULL we will not match the row.
2122 return condition, needed_inner
2123
2124 def set_empty(self):
2125 self.where.add(NothingNode(), AND)
2126 for query in self.combined_queries:
2127 query.set_empty()
2128
2129 def is_empty(self):
2130 return any(isinstance(c, NothingNode) for c in self.where.children)
2131
2132 def set_limits(self, low=None, high=None):
2133 """
2134 Adjust the limits on the rows retrieved. Use low/high to set these,
2135 as it makes it more Pythonic to read and write. When the SQL query is
2136 created, convert them to the appropriate offset and limit values.
2137
2138 Apply any limits passed in here to the existing constraints. Add low
2139 to the current low value and clamp both to any existing high value.
2140 """
2141 if high is not None:
2142 if self.high_mark is not None:
2143 self.high_mark = min(self.high_mark, self.low_mark + high)
2144 else:
2145 self.high_mark = self.low_mark + high
2146 if low is not None:
2147 if self.high_mark is not None:
2148 self.low_mark = min(self.high_mark, self.low_mark + low)
2149 else:
2150 self.low_mark = self.low_mark + low
2151
2152 if self.low_mark == self.high_mark:
2153 self.set_empty()
2154
2155 def clear_limits(self):
2156 """Clear any existing limits."""
2157 self.low_mark, self.high_mark = 0, None
2158
2159 @property
2160 def is_sliced(self):
2161 return self.low_mark != 0 or self.high_mark is not None
2162
2163 def has_limit_one(self):
2164 return self.high_mark is not None and (self.high_mark - self.low_mark) == 1
2165
2166 def can_filter(self):
2167 """
2168 Return True if adding filters to this instance is still possible.
2169
2170 Typically, this means no limits or offsets have been put on the results.
2171 """
2172 return not self.is_sliced
2173
2174 def clear_select_clause(self):
2175 """Remove all fields from SELECT clause."""
2176 self.select = ()
2177 self.default_cols = False
2178 self.select_related = False
2179 self.set_extra_mask(())
2180 self.set_annotation_mask(())
2181 self.selected = None
2182
2183 def clear_select_fields(self):
2184 """
2185 Clear the list of fields to select (but not extra_select columns).
2186 Some queryset types completely replace any existing list of select
2187 columns.
2188 """
2189 self.select = ()
2190 self.values_select = ()
2191 self.selected = None
2192
2193 def add_select_col(self, col, name):
2194 self.select += (col,)
2195 self.values_select += (name,)
2196 self.selected[name] = len(self.select) - 1
2197
2198 def set_select(self, cols):
2199 self.default_cols = False
2200 self.select = tuple(cols)
2201
2202 def add_distinct_fields(self, *field_names):
2203 """
2204 Add and resolve the given fields to the query's "distinct on" clause.
2205 """
2206 self.distinct_fields = field_names
2207 self.distinct = True
2208
2209 def add_fields(self, field_names, allow_m2m=True):
2210 """
2211 Add the given (model) fields to the select set. Add the field names in
2212 the order specified.
2213 """
2214 alias = self.get_initial_alias()
2215 opts = self.get_meta()
2216
2217 try:
2218 cols = []
2219 for name in field_names:
2220 # Join promotion note - we must not remove any rows here, so
2221 # if there is no existing joins, use outer join.
2222 join_info = self.setup_joins(
2223 name.split(LOOKUP_SEP), opts, alias, allow_many=allow_m2m
2224 )
2225 targets, final_alias, joins = self.trim_joins(
2226 join_info.targets,
2227 join_info.joins,
2228 join_info.path,
2229 )
2230 for target in targets:
2231 cols.append(join_info.transform_function(target, final_alias))
2232 if cols:
2233 self.set_select(cols)
2234 except MultiJoin:
2235 raise FieldError("Invalid field name: '%s'" % name)
2236 except FieldError:
2237 if LOOKUP_SEP in name:
2238 # For lookups spanning over relationships, show the error
2239 # from the model on which the lookup failed.
2240 raise
2241 else:
2242 names = sorted(
2243 [
2244 *get_field_names_from_opts(opts),
2245 *self.extra,
2246 *self.annotation_select,
2247 *self._filtered_relations,
2248 ]
2249 )
2250 raise FieldError(
2251 "Cannot resolve keyword %r into field. "
2252 "Choices are: %s" % (name, ", ".join(names))
2253 )
2254
2255 def add_ordering(self, *ordering):
2256 """
2257 Add items from the 'ordering' sequence to the query's "order by"
2258 clause. These items are either field names (not column names) --
2259 possibly with a direction prefix ('-' or '?') -- or OrderBy
2260 expressions.
2261
2262 If 'ordering' is empty, clear all ordering from the query.
2263 """
2264 errors = []
2265 for item in ordering:
2266 if isinstance(item, str):
2267 if item == "?":
2268 continue
2269 item = item.removeprefix("-")
2270 if item in self.annotations:
2271 continue
2272 if self.extra and item in self.extra:
2273 continue
2274 # names_to_path() validates the lookup. A descriptive
2275 # FieldError will be raise if it's not.
2276 self.names_to_path(item.split(LOOKUP_SEP), self.model._meta)
2277 elif not hasattr(item, "resolve_expression"):
2278 errors.append(item)
2279 if getattr(item, "contains_aggregate", False):
2280 raise FieldError(
2281 "Using an aggregate in order_by() without also including "
2282 "it in annotate() is not allowed: %s" % item
2283 )
2284 if errors:
2285 raise FieldError("Invalid order_by arguments: %s" % errors)
2286 if ordering:
2287 self.order_by += ordering
2288 else:
2289 self.default_ordering = False
2290
2291 def clear_ordering(self, force=False, clear_default=True):
2292 """
2293 Remove any ordering settings if the current query allows it without
2294 side effects, set 'force' to True to clear the ordering regardless.
2295 If 'clear_default' is True, there will be no ordering in the resulting
2296 query (not even the model's default).
2297 """
2298 if not force and (
2299 self.is_sliced or self.distinct_fields or self.select_for_update
2300 ):
2301 return
2302 self.order_by = ()
2303 self.extra_order_by = ()
2304 if clear_default:
2305 self.default_ordering = False
2306
2307 def set_group_by(self, allow_aliases=True):
2308 """
2309 Expand the GROUP BY clause required by the query.
2310
2311 This will usually be the set of all non-aggregate fields in the
2312 return data. If the database backend supports grouping by the
2313 primary key, and the query would be equivalent, the optimization
2314 will be made automatically.
2315 """
2316 if allow_aliases and self.values_select:
2317 # If grouping by aliases is allowed assign selected value aliases
2318 # by moving them to annotations.
2319 group_by_annotations = {}
2320 values_select = {}
2321 for alias, expr in zip(self.values_select, self.select):
2322 if isinstance(expr, Col):
2323 values_select[alias] = expr
2324 else:
2325 group_by_annotations[alias] = expr
2326 self.annotations = {**group_by_annotations, **self.annotations}
2327 self.append_annotation_mask(group_by_annotations)
2328 self.select = tuple(values_select.values())
2329 self.values_select = tuple(values_select)
2330 group_by = list(self.select)
2331 for alias, annotation in self.annotation_select.items():
2332 if not (group_by_cols := annotation.get_group_by_cols()):
2333 continue
2334 if allow_aliases and not annotation.contains_aggregate:
2335 group_by.append(Ref(alias, annotation))
2336 else:
2337 group_by.extend(group_by_cols)
2338 self.group_by = tuple(group_by)
2339
2340 def add_select_related(self, fields):
2341 """
2342 Set up the select_related data structure so that we only select
2343 certain related models (as opposed to all models, when
2344 self.select_related=True).
2345 """
2346 if isinstance(self.select_related, bool):
2347 field_dict = {}
2348 else:
2349 field_dict = self.select_related
2350 for field in fields:
2351 d = field_dict
2352 for part in field.split(LOOKUP_SEP):
2353 d = d.setdefault(part, {})
2354 self.select_related = field_dict
2355
2356 def add_extra(self, select, select_params, where, params, tables, order_by):
2357 """
2358 Add data to the various extra_* attributes for user-created additions
2359 to the query.
2360 """
2361 if select:
2362 # We need to pair any placeholder markers in the 'select'
2363 # dictionary with their parameters in 'select_params' so that
2364 # subsequent updates to the select dictionary also adjust the
2365 # parameters appropriately.
2366 select_pairs = {}
2367 if select_params:
2368 param_iter = iter(select_params)
2369 else:
2370 param_iter = iter([])
2371 for name, entry in select.items():
2372 self.check_alias(name)
2373 entry = str(entry)
2374 entry_params = []
2375 pos = entry.find("%s")
2376 while pos != -1:
2377 if pos == 0 or entry[pos - 1] != "%":
2378 entry_params.append(next(param_iter))
2379 pos = entry.find("%s", pos + 2)
2380 select_pairs[name] = (entry, entry_params)
2381 self.extra.update(select_pairs)
2382 if where or params:
2383 self.where.add(ExtraWhere(where, params), AND)
2384 if tables:
2385 self.extra_tables += tuple(tables)
2386 if order_by:
2387 self.extra_order_by = order_by
2388
2389 def clear_deferred_loading(self):
2390 """Remove any fields from the deferred loading set."""
2391 self.deferred_loading = (frozenset(), True)
2392
2393 def add_deferred_loading(self, field_names):
2394 """
2395 Add the given list of model field names to the set of fields to
2396 exclude from loading from the database when automatic column selection
2397 is done. Add the new field names to any existing field names that
2398 are deferred (or removed from any existing field names that are marked
2399 as the only ones for immediate loading).
2400 """
2401 # Fields on related models are stored in the literal double-underscore
2402 # format, so that we can use a set datastructure. We do the foo__bar
2403 # splitting and handling when computing the SQL column names (as part of
2404 # get_columns()).
2405 existing, defer = self.deferred_loading
2406 if defer:
2407 # Add to existing deferred names.
2408 self.deferred_loading = existing.union(field_names), True
2409 else:
2410 # Remove names from the set of any existing "immediate load" names.
2411 if new_existing := existing.difference(field_names):
2412 self.deferred_loading = new_existing, False
2413 else:
2414 self.clear_deferred_loading()
2415 if new_only := set(field_names).difference(existing):
2416 self.deferred_loading = new_only, True
2417
2418 def add_immediate_loading(self, field_names):
2419 """
2420 Add the given list of model field names to the set of fields to
2421 retrieve when the SQL is executed ("immediate loading" fields). The
2422 field names replace any existing immediate loading field names. If
2423 there are field names already specified for deferred loading, remove
2424 those names from the new field_names before storing the new names
2425 for immediate loading. (That is, immediate loading overrides any
2426 existing immediate values, but respects existing deferrals.)
2427 """
2428 existing, defer = self.deferred_loading
2429 field_names = set(field_names)
2430 if "pk" in field_names:
2431 field_names.remove("pk")
2432 field_names.add(self.get_meta().pk.name)
2433
2434 if defer:
2435 # Remove any existing deferred names from the current set before
2436 # setting the new names.
2437 self.deferred_loading = field_names.difference(existing), False
2438 else:
2439 # Replace any existing "immediate load" field names.
2440 self.deferred_loading = frozenset(field_names), False
2441
2442 def set_annotation_mask(self, names):
2443 """Set the mask of annotations that will be returned by the SELECT."""
2444 if names is None:
2445 self.annotation_select_mask = None
2446 else:
2447 self.annotation_select_mask = set(names)
2448 if self.selected:
2449 # Prune the masked annotations.
2450 self.selected = {
2451 key: value
2452 for key, value in self.selected.items()
2453 if not isinstance(value, str)
2454 or value in self.annotation_select_mask
2455 }
2456 # Append the unmasked annotations.
2457 for name in names:
2458 self.selected[name] = name
2459 self._annotation_select_cache = None
2460
2461 def append_annotation_mask(self, names):
2462 if self.annotation_select_mask is not None:
2463 self.set_annotation_mask(self.annotation_select_mask.union(names))
2464
2465 def set_extra_mask(self, names):
2466 """
2467 Set the mask of extra select items that will be returned by SELECT.
2468 Don't remove them from the Query since they might be used later.
2469 """
2470 if names is None:
2471 self.extra_select_mask = None
2472 else:
2473 self.extra_select_mask = set(names)
2474 self._extra_select_cache = None
2475
2476 @property
2477 def has_select_fields(self):
2478 return self.selected is not None
2479
2480 def set_values(self, fields):
2481 self.select_related = False
2482 self.clear_deferred_loading()
2483 self.clear_select_fields()
2484
2485 selected = {}
2486 if fields:
2487 for field in fields:
2488 self.check_alias(field)
2489 field_names = []
2490 extra_names = []
2491 annotation_names = []
2492 if not self.extra and not self.annotations:
2493 # Shortcut - if there are no extra or annotations, then
2494 # the values() clause must be just field names.
2495 field_names = list(fields)
2496 selected = dict(zip(fields, range(len(fields))))
2497 else:
2498 self.default_cols = False
2499 for f in fields:
2500 if extra := self.extra_select.get(f):
2501 extra_names.append(f)
2502 selected[f] = RawSQL(*extra)
2503 elif f in self.annotation_select:
2504 annotation_names.append(f)
2505 selected[f] = f
2506 elif f in self.annotations:
2507 raise FieldError(
2508 f"Cannot select the '{f}' alias. Use annotate() to "
2509 "promote it."
2510 )
2511 else:
2512 # Call `names_to_path` to ensure a FieldError including
2513 # annotations about to be masked as valid choices if
2514 # `f` is not resolvable.
2515 if self.annotation_select:
2516 self.names_to_path(f.split(LOOKUP_SEP), self.model._meta)
2517 selected[f] = len(field_names)
2518 field_names.append(f)
2519 self.set_extra_mask(extra_names)
2520 self.set_annotation_mask(annotation_names)
2521 else:
2522 field_names = [f.attname for f in self.model._meta.concrete_fields]
2523 selected = dict.fromkeys(field_names, None)
2524 # Selected annotations must be known before setting the GROUP BY
2525 # clause.
2526 if self.group_by is True:
2527 self.add_fields(
2528 (f.attname for f in self.model._meta.concrete_fields), False
2529 )
2530 # Disable GROUP BY aliases to avoid orphaning references to the
2531 # SELECT clause which is about to be cleared.
2532 self.set_group_by(allow_aliases=False)
2533 self.clear_select_fields()
2534 elif self.group_by:
2535 # Resolve GROUP BY annotation references if they are not part of
2536 # the selected fields anymore.
2537 group_by = []
2538 for expr in self.group_by:
2539 if isinstance(expr, Ref) and expr.refs not in selected:
2540 expr = self.annotations[expr.refs]
2541 group_by.append(expr)
2542 self.group_by = tuple(group_by)
2543
2544 self.values_select = tuple(field_names)
2545 self.add_fields(field_names, True)
2546 self.selected = selected if fields else None
2547
2548 @property
2549 def annotation_select(self):
2550 """
2551 Return the dictionary of aggregate columns that are not masked and
2552 should be used in the SELECT clause. Cache this result for performance.
2553 """
2554 if self._annotation_select_cache is not None:
2555 return self._annotation_select_cache
2556 elif not self.annotations:
2557 return {}
2558 elif self.annotation_select_mask is not None:
2559 self._annotation_select_cache = {
2560 k: v
2561 for k, v in self.annotations.items()
2562 if k in self.annotation_select_mask
2563 }
2564 return self._annotation_select_cache
2565 else:
2566 return self.annotations
2567
2568 @property
2569 def extra_select(self):
2570 if self._extra_select_cache is not None:
2571 return self._extra_select_cache
2572 if not self.extra:
2573 return {}
2574 elif self.extra_select_mask is not None:
2575 self._extra_select_cache = {
2576 k: v for k, v in self.extra.items() if k in self.extra_select_mask
2577 }
2578 return self._extra_select_cache
2579 else:
2580 return self.extra
2581
2582 def trim_start(self, names_with_path):
2583 """
2584 Trim joins from the start of the join path. The candidates for trim
2585 are the PathInfos in names_with_path structure that are m2m joins.
2586
2587 Also set the select column so the start matches the join.
2588
2589 This method is meant to be used for generating the subquery joins &
2590 cols in split_exclude().
2591
2592 Return a lookup usable for doing outerq.filter(lookup=self) and a
2593 boolean indicating if the joins in the prefix contain a LEFT OUTER join.
2594 _"""
2595 all_paths = []
2596 for _, paths in names_with_path:
2597 all_paths.extend(paths)
2598 contains_louter = False
2599 # Trim and operate only on tables that were generated for
2600 # the lookup part of the query. That is, avoid trimming
2601 # joins generated for F() expressions.
2602 lookup_tables = [
2603 t for t in self.alias_map if t in self._lookup_joins or t == self.base_table
2604 ]
2605 for trimmed_paths, path in enumerate(all_paths):
2606 if path.m2m:
2607 break
2608 if self.alias_map[lookup_tables[trimmed_paths + 1]].join_type == LOUTER:
2609 contains_louter = True
2610 alias = lookup_tables[trimmed_paths]
2611 self.unref_alias(alias)
2612 # The path.join_field is a Rel, lets get the other side's field
2613 join_field = path.join_field.field
2614 # Build the filter prefix.
2615 paths_in_prefix = trimmed_paths
2616 trimmed_prefix = []
2617 for name, path in names_with_path:
2618 if paths_in_prefix - len(path) < 0:
2619 break
2620 trimmed_prefix.append(name)
2621 paths_in_prefix -= len(path)
2622 trimmed_prefix.append(join_field.foreign_related_fields[0].name)
2623 trimmed_prefix = LOOKUP_SEP.join(trimmed_prefix)
2624 # Lets still see if we can trim the first join from the inner query
2625 # (that is, self). We can't do this for:
2626 # - LEFT JOINs because we would miss those rows that have nothing on
2627 # the outer side,
2628 # - INNER JOINs from filtered relations because we would miss their
2629 # filters.
2630 first_join = self.alias_map[lookup_tables[trimmed_paths + 1]]
2631 if first_join.join_type != LOUTER and not first_join.filtered_relation:
2632 select_fields = [r[0] for r in join_field.related_fields]
2633 select_alias = lookup_tables[trimmed_paths + 1]
2634 self.unref_alias(lookup_tables[trimmed_paths])
2635 extra_restriction = join_field.get_extra_restriction(
2636 None, lookup_tables[trimmed_paths + 1]
2637 )
2638 if extra_restriction:
2639 self.where.add(extra_restriction, AND)
2640 else:
2641 # TODO: It might be possible to trim more joins from the start of the
2642 # inner query if it happens to have a longer join chain containing the
2643 # values in select_fields. Lets punt this one for now.
2644 select_fields = [r[1] for r in join_field.related_fields]
2645 select_alias = lookup_tables[trimmed_paths]
2646 # The found starting point is likely a join_class instead of a
2647 # base_table_class reference. But the first entry in the query's FROM
2648 # clause must not be a JOIN.
2649 for table in self.alias_map:
2650 if self.alias_refcount[table] > 0:
2651 self.alias_map[table] = self.base_table_class(
2652 self.alias_map[table].table_name,
2653 table,
2654 )
2655 break
2656 self.set_select([f.get_col(select_alias) for f in select_fields])
2657 return trimmed_prefix, contains_louter
2658
2659 def is_nullable(self, field):
2660 """
2661 Check if the given field should be treated as nullable.
2662
2663 Some backends treat '' as null and Django treats such fields as
2664 nullable for those backends. In such situations field.null can be
2665 False even if we should treat the field as nullable.
2666 """
2667 # We need to use DEFAULT_DB_ALIAS here, as QuerySet does not have
2668 # (nor should it have) knowledge of which connection is going to be
2669 # used. The proper fix would be to defer all decisions where
2670 # is_nullable() is needed to the compiler stage, but that is not easy
2671 # to do currently.
2672 return field.null or (
2673 field.empty_strings_allowed
2674 and connections[DEFAULT_DB_ALIAS].features.interprets_empty_strings_as_nulls
2675 )
2676
2677
2678def get_order_dir(field, default="ASC"):
2679 """
2680 Return the field name and direction for an order specification. For
2681 example, '-foo' is returned as ('foo', 'DESC').
2682
2683 The 'default' param is used to indicate which way no prefix (or a '+'
2684 prefix) should sort. The '-' prefix always sorts the opposite way.
2685 """
2686 dirn = ORDER_DIR[default]
2687 if field[0] == "-":
2688 return field[1:], dirn[1]
2689 return field, dirn[0]
2690
2691
2692class JoinPromoter:
2693 """
2694 A class to abstract away join promotion problems for complex filter
2695 conditions.
2696 """
2697
2698 def __init__(self, connector, num_children, negated):
2699 self.connector = connector
2700 self.negated = negated
2701 if self.negated:
2702 if connector == AND:
2703 self.effective_connector = OR
2704 else:
2705 self.effective_connector = AND
2706 else:
2707 self.effective_connector = self.connector
2708 self.num_children = num_children
2709 # Maps of table alias to how many times it is seen as required for
2710 # inner and/or outer joins.
2711 self.votes = Counter()
2712
2713 def __repr__(self):
2714 return (
2715 f"{self.__class__.__qualname__}(connector={self.connector!r}, "
2716 f"num_children={self.num_children!r}, negated={self.negated!r})"
2717 )
2718
2719 def add_votes(self, votes):
2720 """
2721 Add single vote per item to self.votes. Parameter can be any
2722 iterable.
2723 """
2724 self.votes.update(votes)
2725
2726 def update_join_types(self, query):
2727 """
2728 Change join types so that the generated query is as efficient as
2729 possible, but still correct. So, change as many joins as possible
2730 to INNER, but don't make OUTER joins INNER if that could remove
2731 results from the query.
2732 """
2733 to_promote = set()
2734 to_demote = set()
2735 # The effective_connector is used so that NOT (a AND b) is treated
2736 # similarly to (a OR b) for join promotion.
2737 for table, votes in self.votes.items():
2738 # We must use outer joins in OR case when the join isn't contained
2739 # in all of the joins. Otherwise the INNER JOIN itself could remove
2740 # valid results. Consider the case where a model with rel_a and
2741 # rel_b relations is queried with rel_a__col=1 | rel_b__col=2. Now,
2742 # if rel_a join doesn't produce any results is null (for example
2743 # reverse foreign key or null value in direct foreign key), and
2744 # there is a matching row in rel_b with col=2, then an INNER join
2745 # to rel_a would remove a valid match from the query. So, we need
2746 # to promote any existing INNER to LOUTER (it is possible this
2747 # promotion in turn will be demoted later on).
2748 if self.effective_connector == OR and votes < self.num_children:
2749 to_promote.add(table)
2750 # If connector is AND and there is a filter that can match only
2751 # when there is a joinable row, then use INNER. For example, in
2752 # rel_a__col=1 & rel_b__col=2, if either of the rels produce NULL
2753 # as join output, then the col=1 or col=2 can't match (as
2754 # NULL=anything is always false).
2755 # For the OR case, if all children voted for a join to be inner,
2756 # then we can use INNER for the join. For example:
2757 # (rel_a__col__icontains=Alex | rel_a__col__icontains=Russell)
2758 # then if rel_a doesn't produce any rows, the whole condition
2759 # can't match. Hence we can safely use INNER join.
2760 if self.effective_connector == AND or (
2761 self.effective_connector == OR and votes == self.num_children
2762 ):
2763 to_demote.add(table)
2764 # Finally, what happens in cases where we have:
2765 # (rel_a__col=1|rel_b__col=2) & rel_a__col__gte=0
2766 # Now, we first generate the OR clause, and promote joins for it
2767 # in the first if branch above. Both rel_a and rel_b are promoted
2768 # to LOUTER joins. After that we do the AND case. The OR case
2769 # voted no inner joins but the rel_a__col__gte=0 votes inner join
2770 # for rel_a. We demote it back to INNER join (in AND case a single
2771 # vote is enough). The demotion is OK, if rel_a doesn't produce
2772 # rows, then the rel_a__col__gte=0 clause can't be true, and thus
2773 # the whole clause must be false. So, it is safe to use INNER
2774 # join.
2775 # Note that in this example we could just as well have the __gte
2776 # clause and the OR clause swapped. Or we could replace the __gte
2777 # clause with an OR clause containing rel_a__col=1|rel_a__col=2,
2778 # and again we could safely demote to INNER.
2779 query.promote_joins(to_promote)
2780 query.demote_joins(to_demote)
2781 return to_demote