From 923f78f5046bd35f5063aa06c1cfeff013317636 Mon Sep 17 00:00:00 2001
From: Malcolm Tredinnick <malcolm.tredinnick@gmail.com>
Date: Fri, 6 Mar 2009 02:02:09 +0000
Subject: [PATCH] Improved table join handling for comparisons against NULL.

This fixes a broad class of bugs involving filters that look for missing
related models and fields. Most of them don't seem to have been reported
(the added tests cover the root cause). The exception is that this has
also fixed #9868.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@9979 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/db/models/sql/expressions.py           |  2 +-
 django/db/models/sql/query.py                 | 53 ++++++++++++-------
 tests/regressiontests/null_queries/models.py  | 23 ++++++++
 .../one_to_one_regress/models.py              | 22 ++++++++
 4 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/django/db/models/sql/expressions.py b/django/db/models/sql/expressions.py
index f011db2165..920cbffe73 100644
--- a/django/db/models/sql/expressions.py
+++ b/django/db/models/sql/expressions.py
@@ -42,7 +42,7 @@ class SQLEvaluator(object):
                 field, source, opts, join_list, last, _ = query.setup_joins(
                     field_list, query.get_meta(),
                     query.get_initial_alias(), False)
-                _, _, col, _, join_list = query.trim_joins(source, join_list, last, False)
+                col, _, join_list = query.trim_joins(source, join_list, last, False)
 
                 self.cols[node] = (join_list[-1], col)
             except FieldDoesNotExist:
diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
index fb07529ee2..7c2816f715 100644
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@@ -1287,7 +1287,7 @@ class BaseQuery(object):
                 field_list, opts, self.get_initial_alias(), False)
 
             # Process the join chain to see if it can be trimmed
-            _, _, col, _, join_list = self.trim_joins(source, join_list, last, False)
+            col, _, join_list = self.trim_joins(source, join_list, last, False)
 
             # If the aggregate references a model or field that requires a join,
             # those joins must be LEFT OUTER - empty join rows must be returned
@@ -1392,16 +1392,16 @@ class BaseQuery(object):
                     can_reuse)
             return
 
-        # Process the join chain to see if it can be trimmed
-        final, penultimate, col, alias, join_list = self.trim_joins(target, join_list, last, trim)
-
         if (lookup_type == 'isnull' and value is True and not negate and
-                final > 1):
-            # If the comparison is against NULL, we need to use a left outer
-            # join when connecting to the previous model. We make that
-            # adjustment here. We don't do this unless needed as it's less
-            # efficient at the database level.
-            self.promote_alias(join_list[penultimate])
+                len(join_list) > 1):
+            # If the comparison is against NULL, we may need to use some left
+            # outer joins when creating the join chain. This is only done when
+            # needed, as it's less efficient at the database level.
+            self.promote_alias_chain(join_list)
+
+        # Process the join list to see if we can remove any inner joins from
+        # the far end (fewer tables in a query is better).
+        col, alias, join_list = self.trim_joins(target, join_list, last, trim)
 
         if connector == OR:
             # Some joins may need to be promoted when adding a new filter to a
@@ -1436,7 +1436,7 @@ class BaseQuery(object):
         if negate:
             self.promote_alias_chain(join_list)
             if lookup_type != 'isnull':
-                if final > 1:
+                if len(join_list) > 1:
                     for alias in join_list:
                         if self.alias_map[alias][JOIN_TYPE] == self.LOUTER:
                             j_col = self.alias_map[alias][RHS_JOIN_COL]
@@ -1699,13 +1699,28 @@ class BaseQuery(object):
         return field, target, opts, joins, last, extra_filters
 
     def trim_joins(self, target, join_list, last, trim):
-        """An optimization: if the final join is against the same column as
-        we are comparing against, we can go back one step in a join
-        chain and compare against the LHS of the join instead (and then
-        repeat the optimization). The result, potentially, involves less
-        table joins.
+        """
+        Sometimes joins at the end of a multi-table sequence can be trimmed. If
+        the final join is against the same column as we are comparing against,
+        and is an inner join, we can go back one step in a join chain and
+        compare against the LHS of the join instead (and then repeat the
+        optimization). The result, potentially, involves less table joins.
 
-        Returns a tuple
+        The 'target' parameter is the final field being joined to, 'join_list'
+        is the full list of join aliases.
+
+        The 'last' list contains offsets into 'join_list', corresponding to
+        each component of the filter.  Many-to-many relations, for example, add
+        two tables to the join list and we want to deal with both tables the
+        same way, so 'last' has an entry for the first of the two tables and
+        then the table immediately after the second table, in that case.
+
+        The 'trim' parameter forces the final piece of the join list to be
+        trimmed before anything. See the documentation of add_filter() for
+        details about this.
+
+        Returns the final active column and table alias and the new active
+        join_list.
         """
         final = len(join_list)
         penultimate = last.pop()
@@ -1724,7 +1739,7 @@ class BaseQuery(object):
         alias = join_list[-1]
         while final > 1:
             join = self.alias_map[alias]
-            if col != join[RHS_JOIN_COL]:
+            if col != join[RHS_JOIN_COL] or join[JOIN_TYPE] != self.INNER:
                 break
             self.unref_alias(alias)
             alias = join[LHS_ALIAS]
@@ -1733,7 +1748,7 @@ class BaseQuery(object):
             final -= 1
             if final == penultimate:
                 penultimate = last.pop()
-        return final, penultimate, col, alias, join_list
+        return col, alias, join_list
 
     def update_dupe_avoidance(self, opts, col, alias):
         """
diff --git a/tests/regressiontests/null_queries/models.py b/tests/regressiontests/null_queries/models.py
index df04f14eba..7cde0aac61 100644
--- a/tests/regressiontests/null_queries/models.py
+++ b/tests/regressiontests/null_queries/models.py
@@ -13,6 +13,17 @@ class Choice(models.Model):
     def __unicode__(self):
         return u"Choice: %s in poll %s" % (self.choice, self.poll)
 
+# A set of models with an inner one pointing to two outer ones.
+class OuterA(models.Model):
+    pass
+
+class OuterB(models.Model):
+    data = models.CharField(max_length=10)
+
+class Inner(models.Model):
+    first = models.ForeignKey(OuterA)
+    second = models.ForeignKey(OuterB, null=True)
+
 __test__ = {'API_TESTS':"""
 # Regression test for the use of None as a query value. None is interpreted as
 # an SQL NULL, but only in __exact queries.
@@ -56,4 +67,16 @@ ValueError: Cannot use None as a query value
 >>> p2.choice_set.all()
 []
 
+# Querying across reverse relations and then another relation should insert
+# outer joins correctly so as not to exclude results.
+>>> obj = OuterA.objects.create()
+>>> OuterA.objects.filter(inner__second=None)
+[<OuterA: OuterA object>]
+>>> OuterA.objects.filter(inner__second__data=None)
+[<OuterA: OuterA object>]
+>>> _ = Inner.objects.create(first=obj)
+>>> Inner.objects.filter(first__inner__second=None)
+[<Inner: Inner object>]
+
+
 """}
diff --git a/tests/regressiontests/one_to_one_regress/models.py b/tests/regressiontests/one_to_one_regress/models.py
index 66d00d7223..5c89c55f45 100644
--- a/tests/regressiontests/one_to_one_regress/models.py
+++ b/tests/regressiontests/one_to_one_regress/models.py
@@ -33,6 +33,15 @@ class Favorites(models.Model):
     def __unicode__(self):
         return u"Favorites for %s" % self.name
 
+class Target(models.Model):
+    pass
+
+class Pointer(models.Model):
+    other = models.OneToOneField(Target, primary_key=True)
+
+class Pointer2(models.Model):
+    other = models.OneToOneField(Target)
+
 __test__ = {'API_TESTS':"""
 # Regression test for #1064 and #1506: Check that we create models via the m2m
 # relation if the remote model has a OneToOneField.
@@ -119,4 +128,17 @@ False
 >>> r.place == p
 True
 
+# Regression test for #9968: filtering reverse one-to-one relations with
+# primary_key=True was misbehaving. We test both (primary_key=True & False)
+# cases here to prevent any reappearance of the problem.
+>>> _ = Target.objects.create()
+>>> Target.objects.filter(pointer=None)
+[<Target: Target object>]
+>>> Target.objects.exclude(pointer=None)
+[]
+>>> Target.objects.filter(pointer2=None)
+[<Target: Target object>]
+>>> Target.objects.exclude(pointer2=None)
+[]
+
 """}