mirror of
				https://github.com/django/django.git
				synced 2025-10-26 15:16:09 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1199 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			1199 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| =================
 | |
| Query Expressions
 | |
| =================
 | |
| 
 | |
| .. currentmodule:: django.db.models
 | |
| 
 | |
| Query expressions describe a value or a computation that can be used as part of
 | |
| an update, create, filter, order by, annotation, or aggregate. When an
 | |
| expression outputs a boolean value, it may be used directly in filters. There
 | |
| are a number of built-in expressions (documented below) that can be used to
 | |
| help you write queries. Expressions can be combined, or in some cases nested,
 | |
| to form more complex computations.
 | |
| 
 | |
| Supported arithmetic
 | |
| ====================
 | |
| 
 | |
| Django supports negation, addition, subtraction, multiplication, division,
 | |
| modulo arithmetic, and the power operator on query expressions, using Python
 | |
| constants, variables, and even other expressions.
 | |
| 
 | |
| Some examples
 | |
| =============
 | |
| 
 | |
| .. code-block:: python
 | |
| 
 | |
|     from django.db.models import Count, F, Value
 | |
|     from django.db.models.functions import Length, Upper
 | |
| 
 | |
|     # Find companies that have more employees than chairs.
 | |
|     Company.objects.filter(num_employees__gt=F('num_chairs'))
 | |
| 
 | |
|     # Find companies that have at least twice as many employees
 | |
|     # as chairs. Both the querysets below are equivalent.
 | |
|     Company.objects.filter(num_employees__gt=F('num_chairs') * 2)
 | |
|     Company.objects.filter(
 | |
|         num_employees__gt=F('num_chairs') + F('num_chairs'))
 | |
| 
 | |
|     # How many chairs are needed for each company to seat all employees?
 | |
|     >>> company = Company.objects.filter(
 | |
|     ...    num_employees__gt=F('num_chairs')).annotate(
 | |
|     ...    chairs_needed=F('num_employees') - F('num_chairs')).first()
 | |
|     >>> company.num_employees
 | |
|     120
 | |
|     >>> company.num_chairs
 | |
|     50
 | |
|     >>> company.chairs_needed
 | |
|     70
 | |
| 
 | |
|     # Create a new company using expressions.
 | |
|     >>> company = Company.objects.create(name='Google', ticker=Upper(Value('goog')))
 | |
|     # Be sure to refresh it if you need to access the field.
 | |
|     >>> company.refresh_from_db()
 | |
|     >>> company.ticker
 | |
|     'GOOG'
 | |
| 
 | |
|     # Annotate models with an aggregated value. Both forms
 | |
|     # below are equivalent.
 | |
|     Company.objects.annotate(num_products=Count('products'))
 | |
|     Company.objects.annotate(num_products=Count(F('products')))
 | |
| 
 | |
|     # Aggregates can contain complex computations also
 | |
|     Company.objects.annotate(num_offerings=Count(F('products') + F('services')))
 | |
| 
 | |
|     # Expressions can also be used in order_by(), either directly
 | |
|     Company.objects.order_by(Length('name').asc())
 | |
|     Company.objects.order_by(Length('name').desc())
 | |
|     # or using the double underscore lookup syntax.
 | |
|     from django.db.models import CharField
 | |
|     from django.db.models.functions import Length
 | |
|     CharField.register_lookup(Length)
 | |
|     Company.objects.order_by('name__length')
 | |
| 
 | |
|     # Boolean expression can be used directly in filters.
 | |
|     from django.db.models import Exists
 | |
|     Company.objects.filter(
 | |
|         Exists(Employee.objects.filter(company=OuterRef('pk'), salary__gt=10))
 | |
|     )
 | |
| 
 | |
| Built-in Expressions
 | |
| ====================
 | |
| 
 | |
| .. note::
 | |
| 
 | |
|     These expressions are defined in ``django.db.models.expressions`` and
 | |
|     ``django.db.models.aggregates``, but for convenience they're available and
 | |
|     usually imported from :mod:`django.db.models`.
 | |
| 
 | |
| ``F()`` expressions
 | |
| -------------------
 | |
| 
 | |
| .. class:: F
 | |
| 
 | |
| An ``F()`` object represents the value of a model field or annotated column. It
 | |
| makes it possible to refer to model field values and perform  database
 | |
| operations using them without actually having to pull them out of the  database
 | |
| into Python memory.
 | |
| 
 | |
| Instead, Django uses the ``F()`` object to generate an SQL expression that
 | |
| describes the required operation at the database level.
 | |
| 
 | |
| Let's try this with an example. Normally, one might do something like this::
 | |
| 
 | |
|     # Tintin filed a news story!
 | |
|     reporter = Reporters.objects.get(name='Tintin')
 | |
|     reporter.stories_filed += 1
 | |
|     reporter.save()
 | |
| 
 | |
| Here, we have pulled the value of ``reporter.stories_filed`` from the database
 | |
| into memory and manipulated it using familiar Python operators, and then saved
 | |
| the object back to the database. But instead we could also have done::
 | |
| 
 | |
|     from django.db.models import F
 | |
| 
 | |
|     reporter = Reporters.objects.get(name='Tintin')
 | |
|     reporter.stories_filed = F('stories_filed') + 1
 | |
|     reporter.save()
 | |
| 
 | |
| Although ``reporter.stories_filed = F('stories_filed') + 1`` looks like a
 | |
| normal Python assignment of value to an instance attribute, in fact it's an SQL
 | |
| construct describing an operation on the database.
 | |
| 
 | |
| When Django encounters an instance of ``F()``, it overrides the standard Python
 | |
| operators to create an encapsulated SQL expression; in this case, one which
 | |
| instructs the database to increment the database field represented by
 | |
| ``reporter.stories_filed``.
 | |
| 
 | |
| Whatever value is or was on ``reporter.stories_filed``, Python never gets to
 | |
| know about it - it is dealt with entirely by the database. All Python does,
 | |
| through Django's ``F()`` class, is create the SQL syntax to refer to the field
 | |
| and describe the operation.
 | |
| 
 | |
| To access the new value saved this way, the object must be reloaded::
 | |
| 
 | |
|    reporter = Reporters.objects.get(pk=reporter.pk)
 | |
|    # Or, more succinctly:
 | |
|    reporter.refresh_from_db()
 | |
| 
 | |
| As well as being used in operations on single instances as above, ``F()`` can
 | |
| be used on ``QuerySets`` of object instances, with ``update()``. This reduces
 | |
| the two queries we were using above - the ``get()`` and the
 | |
| :meth:`~Model.save()` - to just one::
 | |
| 
 | |
|     reporter = Reporters.objects.filter(name='Tintin')
 | |
|     reporter.update(stories_filed=F('stories_filed') + 1)
 | |
| 
 | |
| We can also use :meth:`~django.db.models.query.QuerySet.update()` to increment
 | |
| the field value on multiple objects - which could be very much faster than
 | |
| pulling them all into Python from the database, looping over them, incrementing
 | |
| the field value of each one, and saving each one back to the database::
 | |
| 
 | |
|     Reporter.objects.all().update(stories_filed=F('stories_filed') + 1)
 | |
| 
 | |
| ``F()`` therefore can offer performance advantages by:
 | |
| 
 | |
| * getting the database, rather than Python, to do work
 | |
| * reducing the number of queries some operations require
 | |
| 
 | |
| .. _avoiding-race-conditions-using-f:
 | |
| 
 | |
| Avoiding race conditions using ``F()``
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| Another useful benefit of ``F()`` is that having the database - rather than
 | |
| Python - update a field's value avoids a *race condition*.
 | |
| 
 | |
| If two Python threads execute the code in the first example above, one thread
 | |
| could retrieve, increment, and save a field's value after the other has
 | |
| retrieved it from the database. The value that the second thread saves will be
 | |
| based on the original value; the work of the first thread will be lost.
 | |
| 
 | |
| If the database is responsible for updating the field, the process is more
 | |
| robust: it will only ever update the field based on the value of the field in
 | |
| the database when the :meth:`~Model.save()` or ``update()`` is executed, rather
 | |
| than based on its value when the instance was retrieved.
 | |
| 
 | |
| ``F()`` assignments persist after ``Model.save()``
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| ``F()`` objects assigned to model fields persist after saving the model
 | |
| instance and will be applied on each :meth:`~Model.save()`. For example::
 | |
| 
 | |
|     reporter = Reporters.objects.get(name='Tintin')
 | |
|     reporter.stories_filed = F('stories_filed') + 1
 | |
|     reporter.save()
 | |
| 
 | |
|     reporter.name = 'Tintin Jr.'
 | |
|     reporter.save()
 | |
| 
 | |
| ``stories_filed`` will be updated twice in this case. If it's initially ``1``,
 | |
| the final value will be ``3``. This persistence can be avoided by reloading the
 | |
| model object after saving it, for example, by using
 | |
| :meth:`~Model.refresh_from_db()`.
 | |
| 
 | |
| Using ``F()`` in filters
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| ``F()`` is also very useful in ``QuerySet`` filters, where they make it
 | |
| possible to filter a set of objects against criteria based on their field
 | |
| values, rather than on Python values.
 | |
| 
 | |
| This is documented in :ref:`using F() expressions in queries
 | |
| <using-f-expressions-in-filters>`.
 | |
| 
 | |
| .. _using-f-with-annotations:
 | |
| 
 | |
| Using ``F()`` with annotations
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| ``F()`` can be used to create dynamic fields on your models by combining
 | |
| different fields with arithmetic::
 | |
| 
 | |
|     company = Company.objects.annotate(
 | |
|         chairs_needed=F('num_employees') - F('num_chairs'))
 | |
| 
 | |
| If the fields that you're combining are of different types you'll need
 | |
| to tell Django what kind of field will be returned. Since ``F()`` does not
 | |
| directly support ``output_field`` you will need to wrap the expression with
 | |
| :class:`ExpressionWrapper`::
 | |
| 
 | |
|     from django.db.models import DateTimeField, ExpressionWrapper, F
 | |
| 
 | |
|     Ticket.objects.annotate(
 | |
|         expires=ExpressionWrapper(
 | |
|             F('active_at') + F('duration'), output_field=DateTimeField()))
 | |
| 
 | |
| When referencing relational fields such as ``ForeignKey``, ``F()`` returns the
 | |
| primary key value rather than a model instance::
 | |
| 
 | |
|     >> car = Company.objects.annotate(built_by=F('manufacturer'))[0]
 | |
|     >> car.manufacturer
 | |
|     <Manufacturer: Toyota>
 | |
|     >> car.built_by
 | |
|     3
 | |
| 
 | |
| .. _using-f-to-sort-null-values:
 | |
| 
 | |
| Using ``F()`` to sort null values
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| Use ``F()`` and the ``nulls_first`` or ``nulls_last`` keyword argument to
 | |
| :meth:`.Expression.asc` or :meth:`~.Expression.desc` to control the ordering of
 | |
| a field's null values. By default, the ordering depends on your database.
 | |
| 
 | |
| For example, to sort companies that haven't been contacted (``last_contacted``
 | |
| is null) after companies that have been contacted::
 | |
| 
 | |
|     from django.db.models import F
 | |
|     Company.objects.order_by(F('last_contacted').desc(nulls_last=True))
 | |
| 
 | |
| .. _func-expressions:
 | |
| 
 | |
| ``Func()`` expressions
 | |
| ----------------------
 | |
| 
 | |
| ``Func()`` expressions are the base type of all expressions that involve
 | |
| database functions like ``COALESCE`` and ``LOWER``, or aggregates like ``SUM``.
 | |
| They can be used directly::
 | |
| 
 | |
|     from django.db.models import F, Func
 | |
| 
 | |
|     queryset.annotate(field_lower=Func(F('field'), function='LOWER'))
 | |
| 
 | |
| or they can be used to build a library of database functions::
 | |
| 
 | |
|     class Lower(Func):
 | |
|         function = 'LOWER'
 | |
| 
 | |
|     queryset.annotate(field_lower=Lower('field'))
 | |
| 
 | |
| But both cases will result in a queryset where each model is annotated with an
 | |
| extra attribute ``field_lower`` produced, roughly, from the following SQL:
 | |
| 
 | |
| .. code-block:: sql
 | |
| 
 | |
|     SELECT
 | |
|         ...
 | |
|         LOWER("db_table"."field") as "field_lower"
 | |
| 
 | |
| See :doc:`database-functions` for a list of built-in database functions.
 | |
| 
 | |
| The ``Func`` API is as follows:
 | |
| 
 | |
| .. class:: Func(*expressions, **extra)
 | |
| 
 | |
|     .. attribute:: function
 | |
| 
 | |
|         A class attribute describing the function that will be generated.
 | |
|         Specifically, the ``function`` will be interpolated as the ``function``
 | |
|         placeholder within :attr:`template`. Defaults to ``None``.
 | |
| 
 | |
|     .. attribute:: template
 | |
| 
 | |
|         A class attribute, as a format string, that describes the SQL that is
 | |
|         generated for this function. Defaults to
 | |
|         ``'%(function)s(%(expressions)s)'``.
 | |
| 
 | |
|         If you're constructing SQL like ``strftime('%W', 'date')`` and need a
 | |
|         literal ``%`` character in the query, quadruple it (``%%%%``) in the
 | |
|         ``template`` attribute because the string is interpolated twice: once
 | |
|         during the template interpolation in ``as_sql()`` and once in the SQL
 | |
|         interpolation with the query parameters in the database cursor.
 | |
| 
 | |
|     .. attribute:: arg_joiner
 | |
| 
 | |
|         A class attribute that denotes the character used to join the list of
 | |
|         ``expressions`` together. Defaults to ``', '``.
 | |
| 
 | |
|     .. attribute:: arity
 | |
| 
 | |
|         A class attribute that denotes the number of arguments the function
 | |
|         accepts. If this attribute is set and the function is called with a
 | |
|         different number of expressions, ``TypeError`` will be raised. Defaults
 | |
|         to ``None``.
 | |
| 
 | |
|     .. method:: as_sql(compiler, connection, function=None, template=None, arg_joiner=None, **extra_context)
 | |
| 
 | |
|         Generates the SQL for the database function.
 | |
| 
 | |
|         The ``as_vendor()`` methods should use the ``function``, ``template``,
 | |
|         ``arg_joiner``, and any other ``**extra_context`` parameters to
 | |
|         customize the SQL as needed. For example:
 | |
| 
 | |
|         .. code-block:: python
 | |
|             :caption: django/db/models/functions.py
 | |
| 
 | |
|             class ConcatPair(Func):
 | |
|                 ...
 | |
|                 function = 'CONCAT'
 | |
|                 ...
 | |
| 
 | |
|                 def as_mysql(self, compiler, connection, **extra_context):
 | |
|                     return super().as_sql(
 | |
|                         compiler, connection,
 | |
|                         function='CONCAT_WS',
 | |
|                         template="%(function)s('', %(expressions)s)",
 | |
|                         **extra_context
 | |
|                     )
 | |
| 
 | |
|         To avoid a SQL injection vulnerability, ``extra_context`` :ref:`must
 | |
|         not contain untrusted user input <avoiding-sql-injection-in-query-expressions>`
 | |
|         as these values are interpolated into the SQL string rather than passed
 | |
|         as query parameters, where the database driver would escape them.
 | |
| 
 | |
| The ``*expressions`` argument is a list of positional expressions that the
 | |
| function will be applied to. The expressions will be converted to strings,
 | |
| joined together with ``arg_joiner``, and then interpolated into the ``template``
 | |
| as the ``expressions`` placeholder.
 | |
| 
 | |
| Positional arguments can be expressions or Python values. Strings are
 | |
| assumed to be column references and will be wrapped in ``F()`` expressions
 | |
| while other values will be wrapped in ``Value()`` expressions.
 | |
| 
 | |
| The ``**extra`` kwargs are ``key=value`` pairs that can be interpolated
 | |
| into the ``template`` attribute. To avoid a SQL injection vulnerability,
 | |
| ``extra`` :ref:`must not contain untrusted user input
 | |
| <avoiding-sql-injection-in-query-expressions>` as these values are interpolated
 | |
| into the SQL string rather than passed as query parameters, where the database
 | |
| driver would escape them.
 | |
| 
 | |
| The ``function``, ``template``, and ``arg_joiner`` keywords can be used to
 | |
| replace the attributes of the same name without having to define your own
 | |
| class. ``output_field`` can be used to define the expected return type.
 | |
| 
 | |
| ``Aggregate()`` expressions
 | |
| ---------------------------
 | |
| 
 | |
| An aggregate expression is a special case of a :ref:`Func() expression
 | |
| <func-expressions>` that informs the query that a ``GROUP BY`` clause
 | |
| is required. All of the :ref:`aggregate functions <aggregation-functions>`,
 | |
| like ``Sum()`` and ``Count()``, inherit from ``Aggregate()``.
 | |
| 
 | |
| Since ``Aggregate``\s are expressions and wrap expressions, you can represent
 | |
| some complex computations::
 | |
| 
 | |
|     from django.db.models import Count
 | |
| 
 | |
|     Company.objects.annotate(
 | |
|         managers_required=(Count('num_employees') / 4) + Count('num_managers'))
 | |
| 
 | |
| The ``Aggregate`` API is as follows:
 | |
| 
 | |
| .. class:: Aggregate(*expressions, output_field=None, distinct=False, filter=None, **extra)
 | |
| 
 | |
|     .. attribute:: template
 | |
| 
 | |
|         A class attribute, as a format string, that describes the SQL that is
 | |
|         generated for this aggregate. Defaults to
 | |
|         ``'%(function)s(%(distinct)s%(expressions)s)'``.
 | |
| 
 | |
|     .. attribute:: function
 | |
| 
 | |
|         A class attribute describing the aggregate function that will be
 | |
|         generated. Specifically, the ``function`` will be interpolated as the
 | |
|         ``function`` placeholder within :attr:`template`. Defaults to ``None``.
 | |
| 
 | |
|     .. attribute:: window_compatible
 | |
| 
 | |
|         Defaults to ``True`` since most aggregate functions can be used as the
 | |
|         source expression in :class:`~django.db.models.expressions.Window`.
 | |
| 
 | |
|     .. attribute:: allow_distinct
 | |
| 
 | |
|         A class attribute determining whether or not this aggregate function
 | |
|         allows passing a ``distinct`` keyword argument. If set to ``False``
 | |
|         (default), ``TypeError`` is raised if ``distinct=True`` is passed.
 | |
| 
 | |
| The ``expressions`` positional arguments can include expressions or the names
 | |
| of model fields. They will be converted to a string and used as the
 | |
| ``expressions`` placeholder within the ``template``.
 | |
| 
 | |
| The ``output_field`` argument requires a model field instance, like
 | |
| ``IntegerField()`` or ``BooleanField()``, into which Django will load the value
 | |
| after it's retrieved from the database. Usually no arguments are needed when
 | |
| instantiating the model field as any arguments relating to data validation
 | |
| (``max_length``, ``max_digits``, etc.) will not be enforced on the expression's
 | |
| output value.
 | |
| 
 | |
| Note that ``output_field`` is only required when Django is unable to determine
 | |
| what field type the result should be. Complex expressions that mix field types
 | |
| should define the desired ``output_field``. For example, adding an
 | |
| ``IntegerField()`` and a ``FloatField()`` together should probably have
 | |
| ``output_field=FloatField()`` defined.
 | |
| 
 | |
| The ``distinct`` argument determines whether or not the aggregate function
 | |
| should be invoked for each distinct value of ``expressions`` (or set of
 | |
| values, for multiple ``expressions``). The argument is only supported on
 | |
| aggregates that have :attr:`~Aggregate.allow_distinct` set to ``True``.
 | |
| 
 | |
| The ``filter`` argument takes a :class:`Q object <django.db.models.Q>` that's
 | |
| used to filter the rows that are aggregated. See :ref:`conditional-aggregation`
 | |
| and :ref:`filtering-on-annotations` for example usage.
 | |
| 
 | |
| The ``**extra`` kwargs are ``key=value`` pairs that can be interpolated
 | |
| into the ``template`` attribute.
 | |
| 
 | |
| Creating your own Aggregate Functions
 | |
| -------------------------------------
 | |
| 
 | |
| You can create your own aggregate functions, too. At a minimum, you need to
 | |
| define ``function``, but you can also completely customize the SQL that is
 | |
| generated. Here's a brief example::
 | |
| 
 | |
|     from django.db.models import Aggregate
 | |
| 
 | |
|     class Sum(Aggregate):
 | |
|         # Supports SUM(ALL field).
 | |
|         function = 'SUM'
 | |
|         template = '%(function)s(%(all_values)s%(expressions)s)'
 | |
|         allow_distinct = False
 | |
| 
 | |
|         def __init__(self, expression, all_values=False, **extra):
 | |
|             super().__init__(
 | |
|                 expression,
 | |
|                 all_values='ALL ' if all_values else '',
 | |
|                 **extra
 | |
|             )
 | |
| 
 | |
| ``Value()`` expressions
 | |
| -----------------------
 | |
| 
 | |
| .. class:: Value(value, output_field=None)
 | |
| 
 | |
| 
 | |
| A ``Value()`` object represents the smallest possible component of an
 | |
| expression: a simple value. When you need to represent the value of an integer,
 | |
| boolean, or string within an expression, you can wrap that value within a
 | |
| ``Value()``.
 | |
| 
 | |
| You will rarely need to use ``Value()`` directly. When you write the expression
 | |
| ``F('field') + 1``, Django implicitly wraps the ``1`` in a ``Value()``,
 | |
| allowing simple values to be used in more complex expressions. You will need to
 | |
| use ``Value()`` when you want to pass a string to an expression. Most
 | |
| expressions interpret a string argument as the name of a field, like
 | |
| ``Lower('name')``.
 | |
| 
 | |
| The ``value`` argument describes the value to be included in the expression,
 | |
| such as ``1``, ``True``, or ``None``. Django knows how to convert these Python
 | |
| values into their corresponding database type.
 | |
| 
 | |
| The ``output_field`` argument should be a model field instance, like
 | |
| ``IntegerField()`` or ``BooleanField()``, into which Django will load the value
 | |
| after it's retrieved from the database. Usually no arguments are needed when
 | |
| instantiating the model field as any arguments relating to data validation
 | |
| (``max_length``, ``max_digits``, etc.) will not be enforced on the expression's
 | |
| output value.
 | |
| 
 | |
| ``ExpressionWrapper()`` expressions
 | |
| -----------------------------------
 | |
| 
 | |
| .. class:: ExpressionWrapper(expression, output_field)
 | |
| 
 | |
| ``ExpressionWrapper`` surrounds another expression and provides access to
 | |
| properties, such as ``output_field``, that may not be available on other
 | |
| expressions. ``ExpressionWrapper`` is necessary when using arithmetic on
 | |
| ``F()`` expressions with different types as described in
 | |
| :ref:`using-f-with-annotations`.
 | |
| 
 | |
| Conditional expressions
 | |
| -----------------------
 | |
| 
 | |
| Conditional expressions allow you to use :keyword:`if` ... :keyword:`elif` ...
 | |
| :keyword:`else` logic in queries. Django natively supports SQL ``CASE``
 | |
| expressions. For more details see :doc:`conditional-expressions`.
 | |
| 
 | |
| ``Subquery()`` expressions
 | |
| --------------------------
 | |
| 
 | |
| .. class:: Subquery(queryset, output_field=None)
 | |
| 
 | |
| You can add an explicit subquery to a ``QuerySet`` using the ``Subquery``
 | |
| expression.
 | |
| 
 | |
| For example, to annotate each post with the email address of the author of the
 | |
| newest comment on that post::
 | |
| 
 | |
|     >>> from django.db.models import OuterRef, Subquery
 | |
|     >>> newest = Comment.objects.filter(post=OuterRef('pk')).order_by('-created_at')
 | |
|     >>> Post.objects.annotate(newest_commenter_email=Subquery(newest.values('email')[:1]))
 | |
| 
 | |
| On PostgreSQL, the SQL looks like:
 | |
| 
 | |
| .. code-block:: sql
 | |
| 
 | |
|     SELECT "post"."id", (
 | |
|         SELECT U0."email"
 | |
|         FROM "comment" U0
 | |
|         WHERE U0."post_id" = ("post"."id")
 | |
|         ORDER BY U0."created_at" DESC LIMIT 1
 | |
|     ) AS "newest_commenter_email" FROM "post"
 | |
| 
 | |
| .. note::
 | |
| 
 | |
|     The examples in this section are designed to show how to force
 | |
|     Django to execute a subquery. In some cases it may be possible to
 | |
|     write an equivalent queryset that performs the same task more
 | |
|     clearly or efficiently.
 | |
| 
 | |
| Referencing columns from the outer queryset
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| .. class:: OuterRef(field)
 | |
| 
 | |
| Use ``OuterRef`` when a queryset in a ``Subquery`` needs to refer to a field
 | |
| from the outer query. It acts like an :class:`F` expression except that the
 | |
| check to see if it refers to a valid field isn't made until the outer queryset
 | |
| is resolved.
 | |
| 
 | |
| Instances of ``OuterRef`` may be used in conjunction with nested instances
 | |
| of ``Subquery`` to refer to a containing queryset that isn't the immediate
 | |
| parent. For example, this queryset would need to be within a nested pair of
 | |
| ``Subquery`` instances to resolve correctly::
 | |
| 
 | |
|     >>> Book.objects.filter(author=OuterRef(OuterRef('pk')))
 | |
| 
 | |
| Limiting a subquery to a single column
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| There are times when a single column must be returned from a ``Subquery``, for
 | |
| instance, to use a ``Subquery`` as the target of an ``__in`` lookup. To return
 | |
| all comments for posts published within the last day::
 | |
| 
 | |
|     >>> from datetime import timedelta
 | |
|     >>> from django.utils import timezone
 | |
|     >>> one_day_ago = timezone.now() - timedelta(days=1)
 | |
|     >>> posts = Post.objects.filter(published_at__gte=one_day_ago)
 | |
|     >>> Comment.objects.filter(post__in=Subquery(posts.values('pk')))
 | |
| 
 | |
| In this case, the subquery must use :meth:`~.QuerySet.values`
 | |
| to return only a single column: the primary key of the post.
 | |
| 
 | |
| Limiting the subquery to a single row
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| To prevent a subquery from returning multiple rows, a slice (``[:1]``) of the
 | |
| queryset is used::
 | |
| 
 | |
|     >>> subquery = Subquery(newest.values('email')[:1])
 | |
|     >>> Post.objects.annotate(newest_commenter_email=subquery)
 | |
| 
 | |
| In this case, the subquery must only return a single column *and* a single
 | |
| row: the email address of the most recently created comment.
 | |
| 
 | |
| (Using :meth:`~.QuerySet.get` instead of a slice would fail because the
 | |
| ``OuterRef`` cannot be resolved until the queryset is used within a
 | |
| ``Subquery``.)
 | |
| 
 | |
| ``Exists()`` subqueries
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| .. class:: Exists(queryset)
 | |
| 
 | |
| ``Exists`` is a ``Subquery`` subclass that uses an SQL ``EXISTS`` statement. In
 | |
| many cases it will perform better than a subquery since the database is able to
 | |
| stop evaluation of the subquery when a first matching row is found.
 | |
| 
 | |
| For example, to annotate each post with whether or not it has a comment from
 | |
| within the last day::
 | |
| 
 | |
|     >>> from django.db.models import Exists, OuterRef
 | |
|     >>> from datetime import timedelta
 | |
|     >>> from django.utils import timezone
 | |
|     >>> one_day_ago = timezone.now() - timedelta(days=1)
 | |
|     >>> recent_comments = Comment.objects.filter(
 | |
|     ...     post=OuterRef('pk'),
 | |
|     ...     created_at__gte=one_day_ago,
 | |
|     ... )
 | |
|     >>> Post.objects.annotate(recent_comment=Exists(recent_comments))
 | |
| 
 | |
| On PostgreSQL, the SQL looks like:
 | |
| 
 | |
| .. code-block:: sql
 | |
| 
 | |
|     SELECT "post"."id", "post"."published_at", EXISTS(
 | |
|         SELECT U0."id", U0."post_id", U0."email", U0."created_at"
 | |
|         FROM "comment" U0
 | |
|         WHERE (
 | |
|             U0."created_at" >= YYYY-MM-DD HH:MM:SS AND
 | |
|             U0."post_id" = ("post"."id")
 | |
|         )
 | |
|     ) AS "recent_comment" FROM "post"
 | |
| 
 | |
| It's unnecessary to force ``Exists`` to refer to a single column, since the
 | |
| columns are discarded and a boolean result is returned. Similarly, since
 | |
| ordering is unimportant within an SQL ``EXISTS`` subquery and would only
 | |
| degrade performance, it's automatically removed.
 | |
| 
 | |
| You can query using ``NOT EXISTS`` with ``~Exists()``.
 | |
| 
 | |
| Filtering on a ``Subquery()`` or ``Exists()`` expressions
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| ``Subquery()`` that returns a boolean value and ``Exists()`` may be used as a
 | |
| ``condition`` in :class:`~django.db.models.expressions.When` expressions, or to
 | |
| directly filter a queryset::
 | |
| 
 | |
|     >>> recent_comments = Comment.objects.filter(...)  # From above
 | |
|     >>> Post.objects.filter(Exists(recent_comments))
 | |
| 
 | |
| This will ensure that the subquery will not be added to the ``SELECT`` columns,
 | |
| which may result in a better performance.
 | |
| 
 | |
| .. versionchanged:: 3.0
 | |
| 
 | |
|     In previous versions of Django, it was necessary to first annotate and then
 | |
|     filter against the annotation. This resulted in the annotated value always
 | |
|     being present in the query result, and often resulted in a query that took
 | |
|     more time to execute.
 | |
| 
 | |
| Using aggregates within a ``Subquery`` expression
 | |
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| Aggregates may be used within a ``Subquery``, but they require a specific
 | |
| combination of :meth:`~.QuerySet.filter`, :meth:`~.QuerySet.values`, and
 | |
| :meth:`~.QuerySet.annotate` to get the subquery grouping correct.
 | |
| 
 | |
| Assuming both models have a ``length`` field, to find posts where the post
 | |
| length is greater than the total length of all combined comments::
 | |
| 
 | |
|     >>> from django.db.models import OuterRef, Subquery, Sum
 | |
|     >>> comments = Comment.objects.filter(post=OuterRef('pk')).order_by().values('post')
 | |
|     >>> total_comments = comments.annotate(total=Sum('length')).values('total')
 | |
|     >>> Post.objects.filter(length__gt=Subquery(total_comments))
 | |
| 
 | |
| The initial ``filter(...)`` limits the subquery to the relevant parameters.
 | |
| ``order_by()`` removes the default :attr:`~django.db.models.Options.ordering`
 | |
| (if any) on the ``Comment`` model. ``values('post')`` aggregates comments by
 | |
| ``Post``. Finally, ``annotate(...)`` performs the aggregation. The order in
 | |
| which these queryset methods are applied is important. In this case, since the
 | |
| subquery must be limited to a single column, ``values('total')`` is required.
 | |
| 
 | |
| This is the only way to perform an aggregation within a ``Subquery``, as
 | |
| using :meth:`~.QuerySet.aggregate` attempts to evaluate the queryset (and if
 | |
| there is an ``OuterRef``, this will not be possible to resolve).
 | |
| 
 | |
| Raw SQL expressions
 | |
| -------------------
 | |
| 
 | |
| .. currentmodule:: django.db.models.expressions
 | |
| 
 | |
| .. class:: RawSQL(sql, params, output_field=None)
 | |
| 
 | |
| Sometimes database expressions can't easily express a complex ``WHERE`` clause.
 | |
| In these edge cases, use the ``RawSQL`` expression. For example::
 | |
| 
 | |
|     >>> from django.db.models.expressions import RawSQL
 | |
|     >>> queryset.annotate(val=RawSQL("select col from sometable where othercol = %s", (someparam,)))
 | |
| 
 | |
| These extra lookups may not be portable to different database engines (because
 | |
| you're explicitly writing SQL code) and violate the DRY principle, so you
 | |
| should avoid them if possible.
 | |
| 
 | |
| .. warning::
 | |
| 
 | |
|     To protect against `SQL injection attacks
 | |
|     <https://en.wikipedia.org/wiki/SQL_injection>`_, you must escape any
 | |
|     parameters that the user can control by using ``params``. ``params`` is a
 | |
|     required argument to force you to acknowledge that you're not interpolating
 | |
|     your SQL with user-provided data.
 | |
| 
 | |
|     You also must not quote placeholders in the SQL string. This example is
 | |
|     vulnerable to SQL injection because of the quotes around ``%s``::
 | |
| 
 | |
|         RawSQL("select col from sometable where othercol = '%s'")  # unsafe!
 | |
| 
 | |
|     You can read more about how Django's :ref:`SQL injection protection
 | |
|     <sql-injection-protection>` works.
 | |
| 
 | |
| Window functions
 | |
| ----------------
 | |
| 
 | |
| Window functions provide a way to apply functions on partitions. Unlike a
 | |
| normal aggregation function which computes a final result for each set defined
 | |
| by the group by, window functions operate on :ref:`frames <window-frames>` and
 | |
| partitions, and compute the result for each row.
 | |
| 
 | |
| You can specify multiple windows in the same query which in Django ORM would be
 | |
| equivalent to including multiple expressions in a :doc:`QuerySet.annotate()
 | |
| </topics/db/aggregation>` call. The ORM doesn't make use of named windows,
 | |
| instead they are part of the selected columns.
 | |
| 
 | |
| .. class:: Window(expression, partition_by=None, order_by=None, frame=None, output_field=None)
 | |
| 
 | |
|     .. attribute:: filterable
 | |
| 
 | |
|         Defaults to ``False``. The SQL standard disallows referencing window
 | |
|         functions in the ``WHERE`` clause and Django raises an exception when
 | |
|         constructing a ``QuerySet`` that would do that.
 | |
| 
 | |
|     .. attribute:: template
 | |
| 
 | |
|         Defaults to ``%(expression)s OVER (%(window)s)'``. If only the
 | |
|         ``expression`` argument is provided, the window clause will be blank.
 | |
| 
 | |
| The ``Window`` class is the main expression for an ``OVER`` clause.
 | |
| 
 | |
| The ``expression`` argument is either a :ref:`window function
 | |
| <window-functions>`, an :ref:`aggregate function <aggregation-functions>`, or
 | |
| an expression that's compatible in a window clause.
 | |
| 
 | |
| The ``partition_by`` argument is a list of expressions (column names should be
 | |
| wrapped in an ``F``-object) that control the partitioning of the rows.
 | |
| Partitioning narrows which rows are used to compute the result set.
 | |
| 
 | |
| The ``output_field`` is specified either as an argument or by the expression.
 | |
| 
 | |
| The ``order_by`` argument accepts a sequence of expressions on which you can
 | |
| call :meth:`~django.db.models.Expression.asc` and
 | |
| :meth:`~django.db.models.Expression.desc`. The ordering controls the order in
 | |
| which the expression is applied. For example, if you sum over the rows in a
 | |
| partition, the first result is the value of the first row, the second is the
 | |
| sum of first and second row.
 | |
| 
 | |
| The ``frame`` parameter specifies which other rows that should be used in the
 | |
| computation. See :ref:`window-frames` for details.
 | |
| 
 | |
| For example, to annotate each movie with the average rating for the movies by
 | |
| the same studio in the same genre and release year::
 | |
| 
 | |
|     >>> from django.db.models import Avg, F, Window
 | |
|     >>> from django.db.models.functions import ExtractYear
 | |
|     >>> Movie.objects.annotate(
 | |
|     >>>     avg_rating=Window(
 | |
|     >>>         expression=Avg('rating'),
 | |
|     >>>         partition_by=[F('studio'), F('genre')],
 | |
|     >>>         order_by=ExtractYear('released').asc(),
 | |
|     >>>     ),
 | |
|     >>> )
 | |
| 
 | |
| This allows you to check if a movie is rated better or worse than its peers.
 | |
| 
 | |
| You may want to apply multiple expressions over the same window, i.e., the
 | |
| same partition and frame. For example, you could modify the previous example
 | |
| to also include the best and worst rating in each movie's group (same studio,
 | |
| genre, and release year) by using three window functions in the same query. The
 | |
| partition and ordering from the previous example is extracted into a dictionary
 | |
| to reduce repetition::
 | |
| 
 | |
|     >>> from django.db.models import Avg, F, Max, Min, Window
 | |
|     >>> from django.db.models.functions import ExtractYear
 | |
|     >>> window = {
 | |
|     >>>    'partition_by': [F('studio'), F('genre')],
 | |
|     >>>    'order_by': ExtractYear('released').asc(),
 | |
|     >>> }
 | |
|     >>> Movie.objects.annotate(
 | |
|     >>>     avg_rating=Window(
 | |
|     >>>         expression=Avg('rating'), **window,
 | |
|     >>>     ),
 | |
|     >>>     best=Window(
 | |
|     >>>         expression=Max('rating'), **window,
 | |
|     >>>     ),
 | |
|     >>>     worst=Window(
 | |
|     >>>         expression=Min('rating'), **window,
 | |
|     >>>     ),
 | |
|     >>> )
 | |
| 
 | |
| Among Django's built-in database backends, MySQL 8.0.2+, PostgreSQL, and Oracle
 | |
| support window expressions. Support for different window expression features
 | |
| varies among the different databases. For example, the options in
 | |
| :meth:`~django.db.models.Expression.asc` and
 | |
| :meth:`~django.db.models.Expression.desc` may not be supported. Consult the
 | |
| documentation for your database as needed.
 | |
| 
 | |
| .. _window-frames:
 | |
| 
 | |
| Frames
 | |
| ~~~~~~
 | |
| 
 | |
| For a window frame, you can choose either a range-based sequence of rows or an
 | |
| ordinary sequence of rows.
 | |
| 
 | |
| .. class:: ValueRange(start=None, end=None)
 | |
| 
 | |
|     .. attribute:: frame_type
 | |
| 
 | |
|         This attribute is set to ``'RANGE'``.
 | |
| 
 | |
|     PostgreSQL has limited support for ``ValueRange`` and only supports use of
 | |
|     the standard start and end points, such as ``CURRENT ROW`` and ``UNBOUNDED
 | |
|     FOLLOWING``.
 | |
| 
 | |
| .. class:: RowRange(start=None, end=None)
 | |
| 
 | |
|     .. attribute:: frame_type
 | |
| 
 | |
|         This attribute is set to ``'ROWS'``.
 | |
| 
 | |
| Both classes return SQL with the template::
 | |
| 
 | |
|     %(frame_type)s BETWEEN %(start)s AND %(end)s
 | |
| 
 | |
| Frames narrow the rows that are used for computing the result. They shift from
 | |
| some start point to some specified end point. Frames can be used with and
 | |
| without partitions, but it's often a good idea to specify an ordering of the
 | |
| window to ensure a deterministic result. In a frame, a peer in a frame is a row
 | |
| with an equivalent value, or all rows if an ordering clause isn't present.
 | |
| 
 | |
| The default starting point for a frame is ``UNBOUNDED PRECEDING`` which is the
 | |
| first row of the partition. The end point is always explicitly included in the
 | |
| SQL generated by the ORM and is by default ``UNBOUNDED FOLLOWING``. The default
 | |
| frame includes all rows from the partition to the last row in the set.
 | |
| 
 | |
| The accepted values for the ``start`` and ``end`` arguments are ``None``, an
 | |
| integer, or zero. A negative integer for ``start`` results in ``N preceding``,
 | |
| while ``None`` yields ``UNBOUNDED PRECEDING``. For both ``start`` and ``end``,
 | |
| zero will return ``CURRENT ROW``. Positive integers are accepted for ``end``.
 | |
| 
 | |
| There's a difference in what ``CURRENT ROW`` includes. When specified in
 | |
| ``ROWS`` mode, the frame starts or ends with the current row. When specified in
 | |
| ``RANGE`` mode, the frame starts or ends at the first or last peer according to
 | |
| the ordering clause. Thus, ``RANGE CURRENT ROW`` evaluates the expression for
 | |
| rows which have the same value specified by the ordering. Because the template
 | |
| includes both the ``start`` and ``end`` points, this may be expressed with::
 | |
| 
 | |
|     ValueRange(start=0, end=0)
 | |
| 
 | |
| If a movie's "peers" are described as movies released by the same studio in the
 | |
| same genre in the same year, this ``RowRange`` example annotates each movie
 | |
| with the average rating of a movie's two prior and two following peers::
 | |
| 
 | |
|     >>> from django.db.models import Avg, F, RowRange, Window
 | |
|     >>> from django.db.models.functions import ExtractYear
 | |
|     >>> Movie.objects.annotate(
 | |
|     >>>     avg_rating=Window(
 | |
|     >>>         expression=Avg('rating'),
 | |
|     >>>         partition_by=[F('studio'), F('genre')],
 | |
|     >>>         order_by=ExtractYear('released').asc(),
 | |
|     >>>         frame=RowRange(start=-2, end=2),
 | |
|     >>>     ),
 | |
|     >>> )
 | |
| 
 | |
| If the database supports it, you can specify the start and end points based on
 | |
| values of an expression in the partition. If the ``released`` field of the
 | |
| ``Movie`` model stores the release month of each movies, this ``ValueRange``
 | |
| example annotates each movie with the average rating of a movie's peers
 | |
| released between twelve months before and twelve months after the each movie.
 | |
| 
 | |
|     >>> from django.db.models import Avg, ExpressionList, F, ValueRange, Window
 | |
|     >>> Movie.objects.annotate(
 | |
|     >>>     avg_rating=Window(
 | |
|     >>>         expression=Avg('rating'),
 | |
|     >>>         partition_by=[F('studio'), F('genre')],
 | |
|     >>>         order_by=F('released').asc(),
 | |
|     >>>         frame=ValueRange(start=-12, end=12),
 | |
|     >>>     ),
 | |
|     >>> )
 | |
| 
 | |
| .. currentmodule:: django.db.models
 | |
| 
 | |
| Technical Information
 | |
| =====================
 | |
| 
 | |
| Below you'll find technical implementation details that may be useful to
 | |
| library authors. The technical API and examples below will help with
 | |
| creating generic query expressions that can extend the built-in functionality
 | |
| that Django provides.
 | |
| 
 | |
| Expression API
 | |
| --------------
 | |
| 
 | |
| Query expressions implement the :ref:`query expression API <query-expression>`,
 | |
| but also expose a number of extra methods and attributes listed below. All
 | |
| query expressions must inherit from ``Expression()`` or a relevant
 | |
| subclass.
 | |
| 
 | |
| When a query expression wraps another expression, it is responsible for
 | |
| calling the appropriate methods on the wrapped expression.
 | |
| 
 | |
| .. class:: Expression
 | |
| 
 | |
|     .. attribute:: contains_aggregate
 | |
| 
 | |
|         Tells Django that this expression contains an aggregate and that a
 | |
|         ``GROUP BY`` clause needs to be added to the query.
 | |
| 
 | |
|     .. attribute:: contains_over_clause
 | |
| 
 | |
|         Tells Django that this expression contains a
 | |
|         :class:`~django.db.models.expressions.Window` expression. It's used,
 | |
|         for example, to disallow window function expressions in queries that
 | |
|         modify data.
 | |
| 
 | |
|     .. attribute:: filterable
 | |
| 
 | |
|         Tells Django that this expression can be referenced in
 | |
|         :meth:`.QuerySet.filter`. Defaults to ``True``.
 | |
| 
 | |
|     .. attribute:: window_compatible
 | |
| 
 | |
|         Tells Django that this expression can be used as the source expression
 | |
|         in :class:`~django.db.models.expressions.Window`. Defaults to
 | |
|         ``False``.
 | |
| 
 | |
|     .. method:: resolve_expression(query=None, allow_joins=True, reuse=None, summarize=False, for_save=False)
 | |
| 
 | |
|         Provides the chance to do any pre-processing or validation of
 | |
|         the expression before it's added to the query. ``resolve_expression()``
 | |
|         must also be called on any nested expressions. A ``copy()`` of ``self``
 | |
|         should be returned with any necessary transformations.
 | |
| 
 | |
|         ``query`` is the backend query implementation.
 | |
| 
 | |
|         ``allow_joins`` is a boolean that allows or denies the use of
 | |
|         joins in the query.
 | |
| 
 | |
|         ``reuse`` is a set of reusable joins for multi-join scenarios.
 | |
| 
 | |
|         ``summarize`` is a boolean that, when ``True``, signals that the
 | |
|         query being computed is a terminal aggregate query.
 | |
| 
 | |
|         ``for_save`` is a boolean that, when ``True``, signals that the query
 | |
|         being executed is performing a create or update.
 | |
| 
 | |
|     .. method:: get_source_expressions()
 | |
| 
 | |
|         Returns an ordered list of inner expressions. For example::
 | |
| 
 | |
|           >>> Sum(F('foo')).get_source_expressions()
 | |
|           [F('foo')]
 | |
| 
 | |
|     .. method:: set_source_expressions(expressions)
 | |
| 
 | |
|         Takes a list of expressions and stores them such that
 | |
|         ``get_source_expressions()`` can return them.
 | |
| 
 | |
|     .. method:: relabeled_clone(change_map)
 | |
| 
 | |
|         Returns a clone (copy) of ``self``, with any column aliases relabeled.
 | |
|         Column aliases are renamed when subqueries are created.
 | |
|         ``relabeled_clone()`` should also be called on any nested expressions
 | |
|         and assigned to the clone.
 | |
| 
 | |
|         ``change_map`` is a dictionary mapping old aliases to new aliases.
 | |
| 
 | |
|         Example::
 | |
| 
 | |
|           def relabeled_clone(self, change_map):
 | |
|               clone = copy.copy(self)
 | |
|               clone.expression = self.expression.relabeled_clone(change_map)
 | |
|               return clone
 | |
| 
 | |
|     .. method:: convert_value(value, expression, connection)
 | |
| 
 | |
|         A hook allowing the expression to coerce ``value`` into a more
 | |
|         appropriate type.
 | |
| 
 | |
|     .. method:: get_group_by_cols(alias=None)
 | |
| 
 | |
|         Responsible for returning the list of columns references by
 | |
|         this expression. ``get_group_by_cols()`` should be called on any
 | |
|         nested expressions. ``F()`` objects, in particular, hold a reference
 | |
|         to a column. The ``alias`` parameter will be ``None`` unless the
 | |
|         expression has been annotated and is used for grouping.
 | |
| 
 | |
|         .. versionchanged:: 3.0
 | |
| 
 | |
|             The ``alias`` parameter was added.
 | |
| 
 | |
|     .. method:: asc(nulls_first=False, nulls_last=False)
 | |
| 
 | |
|         Returns the expression ready to be sorted in ascending order.
 | |
| 
 | |
|         ``nulls_first`` and ``nulls_last`` define how null values are sorted.
 | |
|         See :ref:`using-f-to-sort-null-values` for example usage.
 | |
| 
 | |
|     .. method:: desc(nulls_first=False, nulls_last=False)
 | |
| 
 | |
|         Returns the expression ready to be sorted in descending order.
 | |
| 
 | |
|         ``nulls_first`` and ``nulls_last`` define how null values are sorted.
 | |
|         See :ref:`using-f-to-sort-null-values` for example usage.
 | |
| 
 | |
|     .. method:: reverse_ordering()
 | |
| 
 | |
|         Returns ``self`` with any modifications required to reverse the sort
 | |
|         order within an ``order_by`` call. As an example, an expression
 | |
|         implementing ``NULLS LAST`` would change its value to be
 | |
|         ``NULLS FIRST``. Modifications are only required for expressions that
 | |
|         implement sort order like ``OrderBy``. This method is called when
 | |
|         :meth:`~django.db.models.query.QuerySet.reverse()` is called on a
 | |
|         queryset.
 | |
| 
 | |
| Writing your own Query Expressions
 | |
| ----------------------------------
 | |
| 
 | |
| You can write your own query expression classes that use, and can integrate
 | |
| with, other query expressions. Let's step through an example by writing an
 | |
| implementation of the ``COALESCE`` SQL function, without using the built-in
 | |
| :ref:`Func() expressions <func-expressions>`.
 | |
| 
 | |
| The ``COALESCE`` SQL function is defined as taking a list of columns or
 | |
| values. It will return the first column or value that isn't ``NULL``.
 | |
| 
 | |
| We'll start by defining the template to be used for SQL generation and
 | |
| an ``__init__()`` method to set some attributes::
 | |
| 
 | |
|   import copy
 | |
|   from django.db.models import Expression
 | |
| 
 | |
|   class Coalesce(Expression):
 | |
|       template = 'COALESCE( %(expressions)s )'
 | |
| 
 | |
|       def __init__(self, expressions, output_field):
 | |
|         super().__init__(output_field=output_field)
 | |
|         if len(expressions) < 2:
 | |
|             raise ValueError('expressions must have at least 2 elements')
 | |
|         for expression in expressions:
 | |
|             if not hasattr(expression, 'resolve_expression'):
 | |
|                 raise TypeError('%r is not an Expression' % expression)
 | |
|         self.expressions = expressions
 | |
| 
 | |
| We do some basic validation on the parameters, including requiring at least
 | |
| 2 columns or values, and ensuring they are expressions. We are requiring
 | |
| ``output_field`` here so that Django knows what kind of model field to assign
 | |
| the eventual result to.
 | |
| 
 | |
| Now we implement the pre-processing and validation. Since we do not have
 | |
| any of our own validation at this point, we delegate to the nested
 | |
| expressions::
 | |
| 
 | |
|     def resolve_expression(self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False):
 | |
|         c = self.copy()
 | |
|         c.is_summary = summarize
 | |
|         for pos, expression in enumerate(self.expressions):
 | |
|             c.expressions[pos] = expression.resolve_expression(query, allow_joins, reuse, summarize, for_save)
 | |
|         return c
 | |
| 
 | |
| Next, we write the method responsible for generating the SQL::
 | |
| 
 | |
|     def as_sql(self, compiler, connection, template=None):
 | |
|         sql_expressions, sql_params = [], []
 | |
|         for expression in self.expressions:
 | |
|             sql, params = compiler.compile(expression)
 | |
|             sql_expressions.append(sql)
 | |
|             sql_params.extend(params)
 | |
|         template = template or self.template
 | |
|         data = {'expressions': ','.join(sql_expressions)}
 | |
|         return template % data, sql_params
 | |
| 
 | |
|     def as_oracle(self, compiler, connection):
 | |
|         """
 | |
|         Example of vendor specific handling (Oracle in this case).
 | |
|         Let's make the function name lowercase.
 | |
|         """
 | |
|         return self.as_sql(compiler, connection, template='coalesce( %(expressions)s )')
 | |
| 
 | |
| ``as_sql()`` methods can support custom keyword arguments, allowing
 | |
| ``as_vendorname()`` methods to override data used to generate the SQL string.
 | |
| Using ``as_sql()`` keyword arguments for customization is preferable to
 | |
| mutating ``self`` within ``as_vendorname()`` methods as the latter can lead to
 | |
| errors when running on different database backends. If your class relies on
 | |
| class attributes to define data, consider allowing overrides in your
 | |
| ``as_sql()`` method.
 | |
| 
 | |
| We generate the SQL for each of the ``expressions`` by using the
 | |
| ``compiler.compile()`` method, and join the result together with commas.
 | |
| Then the template is filled out with our data and the SQL and parameters
 | |
| are returned.
 | |
| 
 | |
| We've also defined a custom implementation that is specific to the Oracle
 | |
| backend. The ``as_oracle()`` function will be called instead of ``as_sql()``
 | |
| if the Oracle backend is in use.
 | |
| 
 | |
| Finally, we implement the rest of the methods that allow our query expression
 | |
| to play nice with other query expressions::
 | |
| 
 | |
|     def get_source_expressions(self):
 | |
|         return self.expressions
 | |
| 
 | |
|     def set_source_expressions(self, expressions):
 | |
|         self.expressions = expressions
 | |
| 
 | |
| Let's see how it works::
 | |
| 
 | |
|     >>> from django.db.models import F, Value, CharField
 | |
|     >>> qs = Company.objects.annotate(
 | |
|     ...    tagline=Coalesce([
 | |
|     ...        F('motto'),
 | |
|     ...        F('ticker_name'),
 | |
|     ...        F('description'),
 | |
|     ...        Value('No Tagline')
 | |
|     ...        ], output_field=CharField()))
 | |
|     >>> for c in qs:
 | |
|     ...     print("%s: %s" % (c.name, c.tagline))
 | |
|     ...
 | |
|     Google: Do No Evil
 | |
|     Apple: AAPL
 | |
|     Yahoo: Internet Company
 | |
|     Django Software Foundation: No Tagline
 | |
| 
 | |
| .. _avoiding-sql-injection-in-query-expressions:
 | |
| 
 | |
| Avoiding SQL injection
 | |
| ~~~~~~~~~~~~~~~~~~~~~~
 | |
| 
 | |
| Since a ``Func``'s keyword arguments for ``__init__()``  (``**extra``) and
 | |
| ``as_sql()`` (``**extra_context``) are interpolated into the SQL string rather
 | |
| than passed as query parameters (where the database driver would escape them),
 | |
| they must not contain untrusted user input.
 | |
| 
 | |
| For example, if ``substring`` is user-provided, this function is vulnerable to
 | |
| SQL injection::
 | |
| 
 | |
|     from django.db.models import Func
 | |
| 
 | |
|     class Position(Func):
 | |
|         function = 'POSITION'
 | |
|         template = "%(function)s('%(substring)s' in %(expressions)s)"
 | |
| 
 | |
|         def __init__(self, expression, substring):
 | |
|             # substring=substring is a SQL injection vulnerability!
 | |
|             super().__init__(expression, substring=substring)
 | |
| 
 | |
| This function generates a SQL string without any parameters. Since ``substring``
 | |
| is passed to ``super().__init__()`` as a keyword argument, it's interpolated
 | |
| into the SQL string before the query is sent to the database.
 | |
| 
 | |
| Here's a corrected rewrite::
 | |
| 
 | |
|     class Position(Func):
 | |
|         function = 'POSITION'
 | |
|         arg_joiner = ' IN '
 | |
| 
 | |
|         def __init__(self, expression, substring):
 | |
|             super().__init__(substring, expression)
 | |
| 
 | |
| With ``substring`` instead passed as a positional argument, it'll be passed as
 | |
| a parameter in the database query.
 | |
| 
 | |
| Adding support in third-party database backends
 | |
| -----------------------------------------------
 | |
| 
 | |
| If you're using a database backend that uses a different SQL syntax for a
 | |
| certain function, you can add support for it by monkey patching a new method
 | |
| onto the function's class.
 | |
| 
 | |
| Let's say we're writing a backend for Microsoft's SQL Server which uses the SQL
 | |
| ``LEN`` instead of ``LENGTH`` for the :class:`~functions.Length` function.
 | |
| We'll monkey patch a new method called ``as_sqlserver()`` onto the ``Length``
 | |
| class::
 | |
| 
 | |
|     from django.db.models.functions import Length
 | |
| 
 | |
|     def sqlserver_length(self, compiler, connection):
 | |
|         return self.as_sql(compiler, connection, function='LEN')
 | |
| 
 | |
|     Length.as_sqlserver = sqlserver_length
 | |
| 
 | |
| You can also customize the SQL using the ``template`` parameter of ``as_sql()``.
 | |
| 
 | |
| We use ``as_sqlserver()`` because ``django.db.connection.vendor`` returns
 | |
| ``sqlserver`` for the backend.
 | |
| 
 | |
| Third-party backends can register their functions in the top level
 | |
| ``__init__.py`` file of the backend package or in a top level ``expressions.py``
 | |
| file (or package) that is imported from the top level ``__init__.py``.
 | |
| 
 | |
| For user projects wishing to patch the backend that they're using, this code
 | |
| should live in an :meth:`AppConfig.ready()<django.apps.AppConfig.ready>` method.
 |