1
0
mirror of https://github.com/django/django.git synced 2025-06-03 10:39:12 +00:00

Fixed #34262 -- Added support for AnyValue for SQLite, MySQL, Oracle, and Postgresql 16+.

Thanks Simon Charette for the guidance and review. Thanks Tim Schilling for the
documentation review. Thanks David Wobrock for investigation and solution proposals.
This commit is contained in:
ontowhee 2025-03-15 19:23:28 -07:00 committed by Sarah Boyce
parent f603ece016
commit ddb8529415
11 changed files with 212 additions and 11 deletions

View File

@ -266,6 +266,9 @@ class BaseDatabaseFeatures:
# delimiter along with DISTINCT.
supports_aggregate_distinct_multiple_argument = True
# Does the database support SQL 2023 ANY_VALUE in GROUP BY?
supports_any_value = False
# Does the backend support indexing a TextField?
supports_index_on_text_field = True

View File

@ -111,16 +111,6 @@ class DatabaseFeatures(BaseDatabaseFeatures):
},
}
)
if "ONLY_FULL_GROUP_BY" in self.connection.sql_mode:
skips.update(
{
"GROUP BY cannot contain nonaggregated column when "
"ONLY_FULL_GROUP_BY mode is enabled on MySQL, see #34262.": {
"aggregation.tests.AggregateTestCase."
"test_group_by_nested_expression_with_params",
},
}
)
if self.connection.mysql_version < (8, 0, 31):
skips.update(
{
@ -297,3 +287,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
if self.connection.mysql_is_mariadb:
return "ONLY_FULL_GROUP_BY" not in self.connection.sql_mode
return True
@cached_property
def supports_any_value(self):
return not self.connection.mysql_is_mariadb

View File

@ -61,6 +61,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
END;
"""
supports_callproc_kwargs = True
supports_any_value = True
supports_over_clause = True
supports_frame_range_fixed_distance = True
supports_ignore_conflicts = False

View File

@ -162,3 +162,5 @@ class DatabaseFeatures(BaseDatabaseFeatures):
supports_nulls_distinct_unique_constraints = property(
operator.attrgetter("is_postgresql_15")
)
supports_any_value = property(operator.attrgetter("is_postgresql_16"))

View File

@ -80,6 +80,7 @@ def register(connection):
connection.create_aggregate("STDDEV_SAMP", 1, StdDevSamp)
connection.create_aggregate("VAR_POP", 1, VarPop)
connection.create_aggregate("VAR_SAMP", 1, VarSamp)
connection.create_aggregate("ANY_VALUE", 1, AnyValue)
# Some math functions are enabled by default in SQLite 3.35+.
sql = "select sqlite_compileoption_used('ENABLE_MATH_FUNCTIONS')"
if not connection.execute(sql).fetchone()[0]:
@ -513,3 +514,8 @@ class VarPop(ListAggregate):
class VarSamp(ListAggregate):
finalize = statistics.variance
class AnyValue(ListAggregate):
def finalize(self):
return self[0]

View File

@ -36,6 +36,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
supports_aggregate_filter_clause = True
supports_aggregate_order_by_clause = Database.sqlite_version_info >= (3, 44, 0)
supports_aggregate_distinct_multiple_argument = False
supports_any_value = True
order_by_nulls_first = True
supports_json_field_contains = False
supports_update_conflicts = True

View File

@ -22,6 +22,7 @@ from django.db.models.functions.mixins import (
__all__ = [
"Aggregate",
"AnyValue",
"Avg",
"Count",
"Max",
@ -229,6 +230,20 @@ class Aggregate(Func):
return options
class AnyValue(Aggregate):
function = "ANY_VALUE"
name = "AnyValue"
arity = 1
window_compatible = False
def as_sql(self, compiler, connection, **extra_context):
if not connection.features.supports_any_value:
raise NotSupportedError(
"ANY_VALUE is not supported on this database backend."
)
return super().as_sql(compiler, connection, **extra_context)
class Avg(FixDurationInputMixin, NumericOutputFieldMixin, Aggregate):
function = "AVG"
name = "Avg"

View File

@ -3943,6 +3943,60 @@ when the queryset (or grouping) contains no entries.
Keyword arguments that can provide extra context for the SQL generated
by the aggregate.
``AnyValue``
~~~~~~~~~~~~
.. versionadded:: 6.0
.. class:: AnyValue(expression, output_field=None, filter=None, default=None, **extra)
Returns an arbitrary value from the non-null input values.
* Default alias: ``<field>__anyvalue``
* Return type: same as input field, or ``output_field`` if supplied. If the
queryset or grouping is empty, ``default`` is returned.
Usage example:
.. code-block:: pycon
>>> # Get average rating for each year along with a sample headline
>>> # from that year.
>>> from django.db.models import AnyValue, Avg, F, Q
>>> sample_headline = AnyValue("headline")
>>> Entry.objects.values(
... pub_year=F("pub_date__year"),
... ).annotate(
... avg_rating=Avg("rating"),
... sample_headline=sample_headline,
... )
>>> # Get a sample headline from each year with rating greater than 4.5.
>>> sample_headline = AnyValue(
... "headline",
... filter=Q(rating__gt=4.5),
... )
>>> Entry.objects.values(
... pub_year=F("pub_date__year"),
... ).annotate(
... avg_rating=Avg("rating"),
... sample_headline=sample_headline,
... )
Supported on SQLite, MySQL, Oracle, and PostgreSQL 16+.
.. admonition:: MySQL with ``ONLY_FULL_GROUP_BY`` enabled
When the ``ONLY_FULL_GROUP_BY`` SQL mode is enabled on MySQL it may be
necessary to use ``AnyValue`` if an aggregation includes a mix of
aggregate and non-aggregate functions. Using ``AnyValue`` allows the
non-aggregate function to be referenced in the select list when
database cannot determine that it is functionally dependent on the
columns in the `group by`_ clause. See the :ref:`aggregation
documentation <aggregation-mysql-only-full-group-by>` for more details.
.. _group by: https://dev.mysql.com/doc/refman/8.4/en/group-by-handling.html
``Avg``
~~~~~~~

View File

@ -212,6 +212,10 @@ Models
* :class:`~django.db.models.JSONField` now supports
:ref:`negative array indexing <key-index-and-path-transforms>` on SQLite.
* The new :class:`~django.db.models.AnyValue` aggregate returns an arbitrary
value from the non-null input values. This is supported on SQLite, MySQL,
Oracle, and PostgreSQL 16+.
Pagination
~~~~~~~~~~

View File

@ -679,3 +679,65 @@ no books can be found:
Under the hood, the :ref:`default <aggregate-default>` argument is implemented
by wrapping the aggregate function with
:class:`~django.db.models.functions.Coalesce`.
.. _aggregation-mysql-only-full-group-by:
Aggregating with MySQL ``ONLY_FULL_GROUP_BY`` enabled
-----------------------------------------------------
When using the ``values()`` clause to group query results for annotations in
MySQL with the ``ONLY_FULL_GROUP_BY`` SQL mode enabled, you may need to apply
:class:`~django.db.models.AnyValue` if the annotation includes a mix of
aggregate and non-aggregate expressions.
Take the following example:
.. code-block:: pycon
>>> from django.db.models import F, Count, Greatest
>>> Book.objects.values(greatest_pages=Greatest("pages", 600)).annotate(
... num_authors=Count("authors"),
... pages_per_author=F("greatest_pages") / F("num_authors"),
... ).aggregate(Avg("pages_per_author"))
This creates groups of books based on the SQL column ``GREATEST(pages, 600)``.
One unique group consists of books with 600 pages or less, and other unique
groups will consist of books with the same pages. The ``pages_per_author``
annotation is composed of aggregate and non-aggregate expressions,
``num_authors`` is an aggregate expression while ``greatest_page`` isn't.
Since the grouping is based on the ``greatest_pages`` expression, MySQL may be
unable to determine that ``greatest_pages`` (used in the ``pages_per_author``
expression) is functionally dependent on the grouped column. As a result, it
may raise an error like:
.. code-block:: pytb
OperationalError: (1055, "Expression #2 of SELECT list is not in GROUP BY
clause and contains nonaggregated column 'book_book.pages' which is not
functionally dependent on columns in GROUP BY clause; this is incompatible
with sql_mode=only_full_group_by")
To avoid this, you can wrap the non-aggregate expression with
:class:`~django.db.models.AnyValue`.
.. code-block:: pycon
>>> from django.db.models import F, Count, Greatest
>>> Book.objects.values(
... greatest_pages=Greatest("pages", 600),
... ).annotate(
... num_authors=Count("authors"),
... pages_per_author=AnyValue(F("greatest_pages")) / F("num_authors"),
... ).aggregate(Avg("pages_per_author"))
{'pages_per_author__avg': 532.57143333}
Other supported databases do not encounter the ``OperationalError`` in the
example above because they can detect the functional dependency. In general,
``AnyValue`` is useful when dealing with select list columns that involve
non-aggregate functions or complex expressions not recognized by the database
as functionally dependent on the columns in the grouping clause.
.. versionchanged:: 6.0
The :class:`~django.db.models.AnyValue` aggregate was added.

View File

@ -6,6 +6,7 @@ from decimal import Decimal
from django.core.exceptions import FieldError
from django.db import NotSupportedError, connection
from django.db.models import (
AnyValue,
Avg,
Case,
CharField,
@ -1662,6 +1663,10 @@ class AggregateTestCase(TestCase):
self.assertEqual(dict(has_long_books_breakdown), {True: 2, False: 3})
def test_group_by_nested_expression_with_params(self):
greatest_pages_param = "greatest_pages"
if connection.vendor == "mysql" and connection.features.supports_any_value:
greatest_pages_param = AnyValue("greatest_pages")
books_qs = (
Book.objects.annotate(greatest_pages=Greatest("pages", Value(600)))
.values(
@ -1669,12 +1674,66 @@ class AggregateTestCase(TestCase):
)
.annotate(
min_pages=Min("pages"),
least=Least("min_pages", "greatest_pages"),
least=Least("min_pages", greatest_pages_param),
)
.values_list("least", flat=True)
)
self.assertCountEqual(books_qs, [300, 946, 1132])
@skipUnlessDBFeature("supports_any_value")
def test_any_value(self):
books_qs = (
Book.objects.values(greatest_pages=Greatest("pages", 600))
.annotate(
pubdate_year=AnyValue("pubdate__year"),
)
.values_list("pubdate_year", flat=True)
.order_by("pubdate_year")
)
self.assertCountEqual(books_qs[0:2], [1991, 1995])
self.assertIn(books_qs[2], [2007, 2008])
@skipUnlessDBFeature("supports_any_value")
def test_any_value_filter(self):
books_qs = (
Book.objects.values(greatest_pages=Greatest("pages", 600))
.annotate(
pubdate_year=AnyValue("pubdate__year", filter=Q(rating__lte=4.5)),
)
.values_list("pubdate_year", flat=True)
)
self.assertCountEqual(books_qs, [2007, 1995, None])
@skipUnlessDBFeature("supports_any_value")
def test_any_value_aggregate_clause(self):
books_qs = (
Book.objects.values(greatest_pages=Greatest("pages", 600))
.annotate(
num_authors=Count("authors"),
pages_per_author=(
AnyValue("greatest_pages") / (Cast("num_authors", FloatField()))
),
)
.values_list("pages_per_author", flat=True)
.order_by("pages_per_author")
)
self.assertAlmostEqual(books_qs[0], 600 / 7, places=4)
self.assertAlmostEqual(books_qs[1], 1132 / 2, places=4)
self.assertAlmostEqual(books_qs[2], 946 / 1, places=4)
aggregate_qs = books_qs.aggregate(Avg("pages_per_author"))
self.assertAlmostEqual(
aggregate_qs["pages_per_author__avg"],
((600 / 7) + (1132 / 2) + (946 / 1)) / 3,
places=4,
)
@skipIfDBFeature("supports_any_value")
def test_any_value_not_supported(self):
message = "ANY_VALUE is not supported on this database backend."
with self.assertRaisesMessage(NotSupportedError, message):
Book.objects.aggregate(AnyValue("rating"))
@skipUnlessDBFeature("supports_subqueries_in_group_by")
def test_aggregation_subquery_annotation_related_field(self):
publisher = Publisher.objects.create(name=self.a9.name, num_awards=2)