From ecaba3602837d1e02fe1e961f7d3bf9086453259 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Anssi=20K=C3=A4=C3=A4ri=C3=A4inen?= <akaariai@gmail.com>
Date: Tue, 24 Sep 2013 19:37:55 +0300
Subject: [PATCH] Improved Query join promotion logic

There were multiple cases where join promotion was a bit too aggressive.
This resulted in using outer joins where not necessary.

Refs #21150.
---
 django/db/models/sql/query.py       | 116 ++++++++++++++++++++--------
 tests/aggregation_regress/models.py |   2 +-
 tests/aggregation_regress/tests.py  |  23 +++++-
 tests/queries/tests.py              |  47 +++++++----
 4 files changed, 136 insertions(+), 52 deletions(-)

diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
index 9eea55e98e..ffd5af6c15 100644
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@@ -901,15 +901,15 @@ class Query(object):
             # Not all tables need to be joined to anything. No join type
             # means the later columns are ignored.
             join_type = None
-        elif outer_if_first or self.alias_map[lhs].join_type == self.LOUTER:
-            # We need to use LOUTER join if asked by outer_if_first or if the
-            # LHS table is left-joined in the query.
+        elif self.alias_map[lhs].join_type == self.LOUTER:
             join_type = self.LOUTER
         else:
             join_type = self.INNER
         join = JoinInfo(table, alias, join_type, lhs, join_cols or ((None, None),), nullable,
                         join_field)
         self.alias_map[alias] = join
+        if outer_if_first:
+            self.promote_joins([alias])
         if connection in self.join_map:
             self.join_map[connection] += (alias,)
         else:
@@ -1004,17 +1004,14 @@ class Query(object):
             #   - this is an annotation over a model field
             # then we need to explore the joins that are required.
 
+            # Join promotion note - we must not remove any rows here, so use
+            # outer join if there isn't any existing join.
             field, sources, opts, join_list, path = self.setup_joins(
-                field_list, opts, self.get_initial_alias())
+                field_list, opts, self.get_initial_alias(), outer_if_first=True)
 
             # Process the join chain to see if it can be trimmed
             targets, _, join_list = self.trim_joins(sources, join_list, path)
 
-            # If the aggregate references a model or field that requires a join,
-            # those joins must be LEFT OUTER - empty join rows must be returned
-            # in order for zeros to be returned for those aggregates.
-            self.promote_joins(join_list)
-
             col = targets[0].column
             source = sources[0]
             col = (join_list[-1], col)
@@ -1089,8 +1086,69 @@ class Query(object):
                         break
         return lookup_type, lookup_parts
 
+    def promote_filter_joins(self, join_list, can_reuse, lookup_type, value,
+                             current_negated, connector):
+        # If the comparison is against NULL, we may need to use some left
+        # outer joins when creating the join chain.
+        #
+        # The logic here is that every isnull lookup in non-negated case is
+        # promoted when the connector is OR. In the AND case we do this only
+        # for first creation of the join. Join demotion happens reverse to
+        # this - demote always in AND case, first use only in OR case.
+        #
+        # In the OR case, a null row for the join can yield results for isnull
+        # lookup. But in the AND case that can't happen (assuming the other
+        # joins require non-null values) - if the join produces null row, then
+        # the ANDed condition that requires non-null value will not match, and
+        # hence the whole condition will not match.
+        #
+        # Consider case: (a__something & a__isnull=True)
+        #
+        # If a is null here, then a__something can't match anything (unless
+        # it also requires outer join), and thus the join doesn't need to be
+        # promoted by a__isnull.
+        #
+        # The connector isn't the only condition for removing join promotion.
+        # The already created joins also play a role here. Above, in the
+        # AND case, we don't want to promote the isnull lookup. But if we have
+        # only (a__isnull), then we must promote it. To see if a join needs
+        # to be promoted we use the seen joins inside this filter clause. That
+        # is contained in can_reuse - those are actually joins that have been
+        # built by this filter clause.
+        #
+        # Similar reasoning applies for join demotion, exception we demote
+        # joins in the AND case always, and never demote them in the OR case.
+        #
+        # Some examples: (a__name__isnull=True | a__type=1)
+        # When a__name__isnull is seen it is promoted (it is first creation of
+        # that join). a__type will not demote the join as it isn't first
+        # "a" join in the filter condition, and this is ORed query.
+        #                (a__name__isnull=True & a__type=1)
+        # Here again a__name__isnull will create an outer join, but now a__type
+        # will demote the join back to inner join as the connector is AND.
+        #                (a__type=1 & a__name__isnull=True)
+        # a__type will create inner join, a__name__isnull will not promote it
+        # to outer join as this is AND case and this isn't first use of the
+        # join. For completeness:
+        #                (a__type=1 | a__name__isnull=True)
+        # The join for a__type is created as inner join. The join is promoted
+        # by a__name__isnull (ORed query => always promote isnull=True joins)
+
+        if lookup_type == 'isnull' and value is True and not current_negated:
+            promotable_joins = join_list if connector == OR else ()
+            if connector == AND and can_reuse is not None:
+                promotable_joins = (j for j in join_list if j not in can_reuse)
+            self.promote_joins(promotable_joins)
+        else:
+            demotable_joins = () if connector == OR else set(join_list)
+            if connector == OR and can_reuse is not None:
+                demotable_joins = set(j for j in join_list if j not in can_reuse)
+            for j in join_list:
+                if self.alias_map[j].join_type == self.LOUTER and j in demotable_joins:
+                    self.alias_map[j] = self.alias_map[j]._replace(join_type=self.INNER)
+
     def build_filter(self, filter_expr, branch_negated=False, current_negated=False,
-                     can_reuse=None):
+                     can_reuse=None, connector=AND):
         """
         Builds a WhereNode for a single filter clause, but doesn't add it
         to this Query. Query.add_q() will then add this filter to the where
@@ -1139,18 +1197,14 @@ class Query(object):
         try:
             field, sources, opts, join_list, path = self.setup_joins(
                 parts, opts, alias, can_reuse, allow_many,)
-            if can_reuse is not None:
-                can_reuse.update(join_list)
         except MultiJoin as e:
             return self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]),
                                       can_reuse, e.names_with_path)
 
-        if (lookup_type == 'isnull' and value is True and not current_negated and
-                len(join_list) > 1):
-            # If the comparison is against NULL, we may need to use some left
-            # outer joins when creating the join chain. This is only done when
-            # needed, as it's less efficient at the database level.
-            self.promote_joins(join_list)
+        self.promote_filter_joins(join_list, can_reuse, lookup_type, value,
+                                  current_negated, connector)
+        if can_reuse is not None:
+            can_reuse.update(join_list)
 
         # Process the join list to see if we can remove any inner joins from
         # the far end (fewer tables in a query is better). Note that join
@@ -1182,7 +1236,7 @@ class Query(object):
         return clause
 
     def add_filter(self, filter_clause):
-        self.where.add(self.build_filter(filter_clause), 'AND')
+        self.where.add(self.build_filter(filter_clause, can_reuse=self.used_aliases), 'AND')
 
     def need_having(self, obj):
         """
@@ -1237,14 +1291,11 @@ class Query(object):
         else:
             where_part, having_parts = self.split_having_parts(
                 q_object.clone(), q_object.negated)
-        used_aliases = self.used_aliases
-        clause = self._add_q(where_part, used_aliases)
+        clause = self._add_q(where_part, self.used_aliases)
         self.where.add(clause, AND)
         for hp in having_parts:
-            clause = self._add_q(hp, used_aliases)
+            clause = self._add_q(hp, self.used_aliases)
             self.having.add(clause, AND)
-        if self.filter_is_sticky:
-            self.used_aliases = used_aliases
 
     def _add_q(self, q_object, used_aliases, branch_negated=False,
                current_negated=False):
@@ -1272,7 +1323,7 @@ class Query(object):
             else:
                 child_clause = self.build_filter(
                     child, can_reuse=used_aliases, branch_negated=branch_negated,
-                    current_negated=current_negated)
+                    current_negated=current_negated, connector=connector)
             target_clause.add(child_clause, connector)
             if connector == OR:
                 used = alias_diff(refcounts_before, self.alias_refcount)
@@ -1445,7 +1496,7 @@ class Query(object):
         """
         # Generate the inner query.
         query = Query(self.model)
-        query.where.add(query.build_filter(filter_expr), AND)
+        query.add_filter(filter_expr)
         query.clear_ordering(True)
         # Try to have as simple as possible subquery -> trim leading joins from
         # the subquery.
@@ -1553,15 +1604,12 @@ class Query(object):
 
         try:
             for name in field_names:
+                # Join promotion note - we must not remove any rows here, so
+                # if there is no existing joins, use outer join.
                 field, targets, u2, joins, path = self.setup_joins(
-                        name.split(LOOKUP_SEP), opts, alias, None, allow_m2m,
-                        True)
-
-                # Trim last join if possible
-                targets, final_alias, remaining_joins = self.trim_joins(targets, joins[-2:], path)
-                joins = joins[:-2] + remaining_joins
-
-                self.promote_joins(joins[1:])
+                    name.split(LOOKUP_SEP), opts, alias, can_reuse=None,
+                    allow_many=allow_m2m, outer_if_first=True)
+                targets, final_alias, joins = self.trim_joins(targets, joins, path)
                 for target in targets:
                     self.select.append(SelectInfo((final_alias, target.column), target))
         except MultiJoin:
diff --git a/tests/aggregation_regress/models.py b/tests/aggregation_regress/models.py
index 24859f4ce4..a2dc060640 100644
--- a/tests/aggregation_regress/models.py
+++ b/tests/aggregation_regress/models.py
@@ -90,7 +90,7 @@ class HardbackBook(Book):
 
 # Models for ticket #21150
 class Alfa(models.Model):
-    pass
+    name = models.CharField(max_length=10, null=True)
 
 class Bravo(models.Model):
     pass
diff --git a/tests/aggregation_regress/tests.py b/tests/aggregation_regress/tests.py
index 82be5afab2..9fcaefe6f5 100644
--- a/tests/aggregation_regress/tests.py
+++ b/tests/aggregation_regress/tests.py
@@ -12,9 +12,8 @@ from django.test import TestCase, skipUnlessDBFeature
 from django.test.utils import Approximate
 from django.utils import six
 
-from .models import (
-    Author, Book, Publisher, Clues, Entries, HardbackBook, ItemTag,
-    WithManualPK, Alfa, Bravo, Charlie)
+from .models import (Author, Book, Publisher, Clues, Entries, HardbackBook,
+        ItemTag, WithManualPK, Alfa, Bravo, Charlie)
 
 
 class AggregationTests(TestCase):
@@ -1137,6 +1136,8 @@ class AggregationTests(TestCase):
             'select__avg': Approximate(1.666, places=2),
         })
 
+
+class JoinPromotionTests(TestCase):
     def test_ticket_21150(self):
         b = Bravo.objects.create()
         c = Charlie.objects.create(bravo=b)
@@ -1152,3 +1153,19 @@ class AggregationTests(TestCase):
         self.assertQuerysetEqual(
             qs, [c], lambda x: x)
         self.assertEqual(qs[0].alfa, a)
+
+    def test_existing_join_not_promoted(self):
+        # No promotion for existing joins
+        qs = Charlie.objects.filter(alfa__name__isnull=False).annotate(Count('alfa__name'))
+        self.assertTrue(' INNER JOIN ' in str(qs.query))
+        # Also, the existing join is unpromoted when doing filtering for already
+        # promoted join.
+        qs = Charlie.objects.annotate(Count('alfa__name')).filter(alfa__name__isnull=False)
+        self.assertTrue(' INNER JOIN ' in str(qs.query))
+        # But, as the join is nullable first use by annotate will be LOUTER
+        qs = Charlie.objects.annotate(Count('alfa__name'))
+        self.assertTrue(' LEFT OUTER JOIN ' in str(qs.query))
+
+    def test_non_nullable_fk_not_promoted(self):
+        qs = Book.objects.annotate(Count('contact__name'))
+        self.assertTrue(' INNER JOIN ' in str(qs.query))
diff --git a/tests/queries/tests.py b/tests/queries/tests.py
index 7bb7a150a2..a7b0af55a3 100644
--- a/tests/queries/tests.py
+++ b/tests/queries/tests.py
@@ -2689,6 +2689,15 @@ class NullJoinPromotionOrTest(TestCase):
         self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
         self.assertEqual(list(qs), [self.a2])
 
+    def test_null_join_demotion(self):
+        qs = ModelA.objects.filter(Q(b__name__isnull=False) & Q(b__name__isnull=True))
+        self.assertTrue(' INNER JOIN ' in str(qs.query))
+        qs = ModelA.objects.filter(Q(b__name__isnull=True) & Q(b__name__isnull=False))
+        self.assertTrue(' INNER JOIN ' in str(qs.query))
+        qs = ModelA.objects.filter(Q(b__name__isnull=False) | Q(b__name__isnull=True))
+        self.assertTrue(' LEFT OUTER JOIN ' in str(qs.query))
+        qs = ModelA.objects.filter(Q(b__name__isnull=True) | Q(b__name__isnull=False))
+        self.assertTrue(' LEFT OUTER JOIN ' in str(qs.query))
 
 class ReverseJoinTrimmingTest(TestCase):
     def test_reverse_trimming(self):
@@ -2785,22 +2794,19 @@ class DisjunctionPromotionTests(TestCase):
         self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
         self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
 
-    @unittest.expectedFailure
-    def test_disjunction_promotion3_failing(self):
-        # Now the ORed filter creates LOUTER join, but we do not have
-        # logic to unpromote it for the AND filter after it. The query
-        # results will be correct, but we have one LOUTER JOIN too much
-        # currently.
+    def test_disjunction_promotion3_demote(self):
+        # This one needs demotion logic: the first filter causes a to be
+        # outer joined, the second filter makes it inner join again.
         qs = BaseA.objects.filter(
             Q(a__f1='foo') | Q(b__f2='foo')).filter(a__f2='bar')
         self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
         self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
 
-    @unittest.expectedFailure
-    def test_disjunction_promotion4_failing(self):
-        # Failure because no join repromotion
+    def test_disjunction_promotion4_demote(self):
         qs = BaseA.objects.filter(Q(a=1) | Q(a=2))
         self.assertEqual(str(qs.query).count('JOIN'), 0)
+        # Demote needed for the "a" join. It is marked as outer join by
+        # above filter (even if it is trimmed away).
         qs = qs.filter(a__f1='foo')
         self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
 
@@ -2810,9 +2816,8 @@ class DisjunctionPromotionTests(TestCase):
         qs = qs.filter(Q(a=1) | Q(a=2))
         self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
 
-    @unittest.expectedFailure
-    def test_disjunction_promotion5_failing(self):
-        # Failure because no join repromotion logic.
+    def test_disjunction_promotion5_demote(self):
+        # Failure because no join demotion logic for this case.
         qs = BaseA.objects.filter(Q(a=1) | Q(a=2))
         # Note that the above filters on a force the join to an
         # inner join even if it is trimmed.
@@ -2823,8 +2828,8 @@ class DisjunctionPromotionTests(TestCase):
         self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
         qs = BaseA.objects.filter(Q(a__f1='foo') | Q(b__f1='foo'))
         # Now the join to a is created as LOUTER
-        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 0)
-        qs = qs.objects.filter(Q(a=1) | Q(a=2))
+        self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 2)
+        qs = qs.filter(Q(a=1) | Q(a=2))
         self.assertEqual(str(qs.query).count('INNER JOIN'), 1)
         self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1)
 
@@ -3079,3 +3084,17 @@ class Ticket21203Tests(TestCase):
         qs = Ticket21203Child.objects.select_related('parent').defer('parent__created')
         self.assertQuerysetEqual(qs, [c], lambda x: x)
         self.assertIs(qs[0].parent.parent_bool, True)
+
+class ValuesJoinPromotionTests(TestCase):
+    def test_values_no_promotion_for_existing(self):
+        qs = Node.objects.filter(parent__parent__isnull=False)
+        self.assertTrue(' INNER JOIN ' in str(qs.query))
+        qs = qs.values('parent__parent__id')
+        self.assertTrue(' INNER JOIN ' in str(qs.query))
+        # Make sure there is a left outer join without the filter.
+        qs = Node.objects.values('parent__parent__id')
+        self.assertTrue(' LEFT OUTER JOIN ' in str(qs.query))
+
+    def test_non_nullable_fk_not_promoted(self):
+        qs = ObjectB.objects.values('objecta__name')
+        self.assertTrue(' INNER JOIN ' in str(qs.query))