From 7b8fa1653fde578ab3a496d9974ab1d4261b8b26 Mon Sep 17 00:00:00 2001
From: Mariusz Felisiak <felisiak.mariusz@gmail.com>
Date: Mon, 2 Mar 2020 13:20:36 +0100
Subject: [PATCH] Fixed #31150 -- Included subqueries that reference related
 fields in GROUP BY clauses.

Thanks Johannes Hoppe for the report.

Regression in fb3f034f1c63160c0ff13c609acd01c18be12f80.

Co-authored-by: Simon Charette <charette.s@gmail.com>
---
 django/db/models/expressions.py | 15 ++++++++++++++-
 docs/releases/3.0.4.txt         |  3 +++
 tests/aggregation/tests.py      | 27 +++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/django/db/models/expressions.py b/django/db/models/expressions.py
index a165923784..f773e99a0f 100644
--- a/django/db/models/expressions.py
+++ b/django/db/models/expressions.py
@@ -6,6 +6,7 @@ from decimal import Decimal
 from django.core.exceptions import EmptyResultSet, FieldError
 from django.db import NotSupportedError, connection
 from django.db.models import fields
+from django.db.models.constants import LOOKUP_SEP
 from django.db.models.query_utils import Q
 from django.utils.deconstruct import deconstructible
 from django.utils.functional import cached_property
@@ -559,6 +560,14 @@ class ResolvedOuterRef(F):
             'only be used in a subquery.'
         )
 
+    def resolve_expression(self, *args, **kwargs):
+        col = super().resolve_expression(*args, **kwargs)
+        # FIXME: Rename possibly_multivalued to multivalued and fix detection
+        # for non-multivalued JOINs (e.g. foreign key fields). This should take
+        # into account only many-to-many and one-to-many relationships.
+        col.possibly_multivalued = LOOKUP_SEP in self.name
+        return col
+
     def relabeled_clone(self, relabels):
         return self
 
@@ -747,6 +756,7 @@ class Random(Expression):
 class Col(Expression):
 
     contains_column_references = True
+    possibly_multivalued = False
 
     def __init__(self, alias, target, output_field=None):
         if output_field is None:
@@ -1042,7 +1052,10 @@ class Subquery(Expression):
     def get_group_by_cols(self, alias=None):
         if alias:
             return [Ref(alias, self)]
-        return self.query.get_external_cols()
+        external_cols = self.query.get_external_cols()
+        if any(col.possibly_multivalued for col in external_cols):
+            return [self]
+        return external_cols
 
 
 class Exists(Subquery):
diff --git a/docs/releases/3.0.4.txt b/docs/releases/3.0.4.txt
index 1da9a22552..ad9752addb 100644
--- a/docs/releases/3.0.4.txt
+++ b/docs/releases/3.0.4.txt
@@ -27,3 +27,6 @@ Bugfixes
 * Fixed a regression in Django 3.0.3 that caused misplacing parameters of SQL
   queries when subtracting ``DateField`` or ``DateTimeField`` expressions on
   MySQL (:ticket:`31312`).
+
+* Fixed a regression in Django 3.0 that didn't include subqueries spanning
+  multivalued relations in the ``GROUP BY`` clause (:ticket:`31150`).
diff --git a/tests/aggregation/tests.py b/tests/aggregation/tests.py
index ed2125c0a9..439a398b76 100644
--- a/tests/aggregation/tests.py
+++ b/tests/aggregation/tests.py
@@ -1,6 +1,7 @@
 import datetime
 import re
 from decimal import Decimal
+from unittest import skipIf
 
 from django.core.exceptions import FieldError
 from django.db import connection
@@ -1190,6 +1191,26 @@ class AggregateTestCase(TestCase):
             },
         ])
 
+    @skipUnlessDBFeature('supports_subqueries_in_group_by')
+    @skipIf(
+        connection.vendor == 'mysql',
+        'GROUP BY optimization does not work properly when ONLY_FULL_GROUP_BY '
+        'mode is enabled on MySQL, see #31331.',
+    )
+    def test_aggregation_subquery_annotation_multivalued(self):
+        """
+        Subquery annotations must be included in the GROUP BY if they use
+        potentially multivalued relations (contain the LOOKUP_SEP).
+        """
+        subquery_qs = Author.objects.filter(
+            pk=OuterRef('pk'),
+            book__name=OuterRef('book__name'),
+        ).values('pk')
+        author_qs = Author.objects.annotate(
+            subquery_id=Subquery(subquery_qs),
+        ).annotate(count=Count('book'))
+        self.assertEqual(author_qs.count(), Author.objects.count())
+
     def test_aggregation_order_by_not_selected_annotation_values(self):
         result_asc = [
             self.b4.pk,
@@ -1248,6 +1269,7 @@ class AggregateTestCase(TestCase):
         ).annotate(total=Count('*'))
         self.assertEqual(dict(has_long_books_breakdown), {True: 2, False: 3})
 
+    @skipUnlessDBFeature('supports_subqueries_in_group_by')
     def test_aggregation_subquery_annotation_related_field(self):
         publisher = Publisher.objects.create(name=self.a9.name, num_awards=2)
         book = Book.objects.create(
@@ -1267,3 +1289,8 @@ class AggregateTestCase(TestCase):
             contact_publisher__isnull=False,
         ).annotate(count=Count('authors'))
         self.assertSequenceEqual(books_qs, [book])
+        # FIXME: GROUP BY doesn't need to include a subquery with
+        # non-multivalued JOINs, see Col.possibly_multivalued (refs #31150):
+        # with self.assertNumQueries(1) as ctx:
+        #     self.assertSequenceEqual(books_qs, [book])
+        # self.assertEqual(ctx[0]['sql'].count('SELECT'), 2)