diff --git a/django/db/backends/base/features.py b/django/db/backends/base/features.py index 4639f1bb2e..e042260810 100644 --- a/django/db/backends/base/features.py +++ b/django/db/backends/base/features.py @@ -302,6 +302,13 @@ class BaseDatabaseFeatures: # {'d': [{'f': 'g'}]}? json_key_contains_list_matching_requires_list = False + # Collation names for use by the Django test suite. + test_collations = { + 'ci': None, # Case-insensitive. + 'cs': None, # Case-sensitive. + 'swedish-ci': None # Swedish case-insensitive. + } + def __init__(self, connection): self.connection = connection diff --git a/django/db/backends/mysql/features.py b/django/db/backends/mysql/features.py index e55d098057..b0bf4fd763 100644 --- a/django/db/backends/mysql/features.py +++ b/django/db/backends/mysql/features.py @@ -44,6 +44,10 @@ class DatabaseFeatures(BaseDatabaseFeatures): supports_partial_indexes = False supports_order_by_nulls_modifier = False order_by_nulls_first = True + test_collations = { + 'ci': 'utf8_general_ci', + 'swedish-ci': 'utf8_swedish_ci', + } @cached_property def _mysql_storage_engine(self): diff --git a/django/db/backends/oracle/features.py b/django/db/backends/oracle/features.py index e78431cc56..88739f5221 100644 --- a/django/db/backends/oracle/features.py +++ b/django/db/backends/oracle/features.py @@ -61,6 +61,11 @@ class DatabaseFeatures(BaseDatabaseFeatures): supports_boolean_expr_in_select_clause = False supports_primitives_in_json_field = False supports_json_field_contains = False + test_collations = { + 'ci': 'BINARY_CI', + 'cs': 'BINARY', + 'swedish_ci': 'SWEDISH_CI', + } @cached_property def introspected_field_types(self): diff --git a/django/db/backends/postgresql/features.py b/django/db/backends/postgresql/features.py index df16691444..67ccd14690 100644 --- a/django/db/backends/postgresql/features.py +++ b/django/db/backends/postgresql/features.py @@ -58,6 +58,9 @@ class DatabaseFeatures(BaseDatabaseFeatures): supports_deferrable_unique_constraints = True has_json_operators = True json_key_contains_list_matching_requires_list = True + test_collations = { + 'swedish-ci': 'sv-x-icu', + } @cached_property def introspected_field_types(self): diff --git a/django/db/backends/sqlite3/features.py b/django/db/backends/sqlite3/features.py index 555d1f6a84..e879e049f9 100644 --- a/django/db/backends/sqlite3/features.py +++ b/django/db/backends/sqlite3/features.py @@ -44,6 +44,10 @@ class DatabaseFeatures(BaseDatabaseFeatures): supports_order_by_nulls_modifier = Database.sqlite_version_info >= (3, 30, 0) order_by_nulls_first = True supports_json_field_contains = False + test_collations = { + 'ci': 'nocase', + 'cs': 'binary', + } @cached_property def supports_atomic_references_rename(self): diff --git a/django/db/models/functions/__init__.py b/django/db/models/functions/__init__.py index fe67439e08..f6cb4eccf3 100644 --- a/django/db/models/functions/__init__.py +++ b/django/db/models/functions/__init__.py @@ -1,4 +1,4 @@ -from .comparison import Cast, Coalesce, Greatest, Least, NullIf +from .comparison import Cast, Coalesce, Collate, Greatest, Least, NullIf from .datetime import ( Extract, ExtractDay, ExtractHour, ExtractIsoWeekDay, ExtractIsoYear, ExtractMinute, ExtractMonth, ExtractQuarter, ExtractSecond, ExtractWeek, @@ -22,7 +22,7 @@ from .window import ( __all__ = [ # comparison and conversion - 'Cast', 'Coalesce', 'Greatest', 'Least', 'NullIf', + 'Cast', 'Coalesce', 'Collate', 'Greatest', 'Least', 'NullIf', # datetime 'Extract', 'ExtractDay', 'ExtractHour', 'ExtractMinute', 'ExtractMonth', 'ExtractQuarter', 'ExtractSecond', 'ExtractWeek', 'ExtractIsoWeekDay', diff --git a/django/db/models/functions/comparison.py b/django/db/models/functions/comparison.py index 6dc235bffb..c1b7754610 100644 --- a/django/db/models/functions/comparison.py +++ b/django/db/models/functions/comparison.py @@ -1,5 +1,6 @@ """Database functions that do comparisons or type conversions.""" from django.db.models.expressions import Func, Value +from django.utils.regex_helper import _lazy_re_compile class Cast(Func): @@ -74,6 +75,23 @@ class Coalesce(Func): return self.as_sql(compiler, connection, **extra_context) +class Collate(Func): + function = 'COLLATE' + template = '%(expressions)s %(function)s %(collation)s' + # Inspired from https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS + collation_re = _lazy_re_compile(r'^[\w\-]+$') + + def __init__(self, expression, collation): + if not (collation and self.collation_re.match(collation)): + raise ValueError('Invalid collation name: %r.' % collation) + self.collation = collation + super().__init__(expression) + + def as_sql(self, compiler, connection, **extra_context): + extra_context.setdefault('collation', connection.ops.quote_name(self.collation)) + return super().as_sql(compiler, connection, **extra_context) + + class Greatest(Func): """ Return the maximum expression. diff --git a/docs/ref/models/database-functions.txt b/docs/ref/models/database-functions.txt index da67d9e362..f5efdb7e87 100644 --- a/docs/ref/models/database-functions.txt +++ b/docs/ref/models/database-functions.txt @@ -87,6 +87,25 @@ Usage examples:: >>> now = timezone.now() >>> Coalesce('updated', Cast(now, DateTimeField())) +``Collate`` +----------- + +.. class:: Collate(expression, collation) + +.. versionadded:: 3.2 + +Takes an expression and a collation name to query against. + +For example, to filter case-insensitively in SQLite:: + + >>> Author.objects.filter(name=Collate(Value('john'), 'nocase')) + , ]> + +It can also be used when ordering, for example with PostgreSQL:: + + >>> Author.objects.order_by(Collate('name', 'et-x-icu')) + , , ]> + ``Greatest`` ------------ diff --git a/docs/releases/3.2.txt b/docs/releases/3.2.txt index 23b687f593..41929172c6 100644 --- a/docs/releases/3.2.txt +++ b/docs/releases/3.2.txt @@ -273,6 +273,9 @@ Models expressions that don't need to be selected but are used for filtering, ordering, or as a part of complex expressions. +* The new :class:`~django.db.models.functions.Collate` function allows + filtering and ordering by specified database collations. + Pagination ~~~~~~~~~~ diff --git a/tests/db_functions/comparison/test_collate.py b/tests/db_functions/comparison/test_collate.py new file mode 100644 index 0000000000..6507c904bc --- /dev/null +++ b/tests/db_functions/comparison/test_collate.py @@ -0,0 +1,56 @@ +from django.db import connection +from django.db.models import F, Value +from django.db.models.functions import Collate +from django.test import TestCase + +from ..models import Author + + +class CollateTests(TestCase): + @classmethod + def setUpTestData(cls): + cls.author1 = Author.objects.create(alias='a', name='Jones 1') + cls.author2 = Author.objects.create(alias='A', name='Jones 2') + + def test_collate_filter_ci(self): + collation = connection.features.test_collations.get('ci') + if not collation: + self.skipTest( + 'This backend does not support case-insensitive collations.' + ) + qs = Author.objects.filter(alias=Collate(Value('a'), collation)) + self.assertEqual(qs.count(), 2) + + def test_collate_order_by_cs(self): + collation = connection.features.test_collations.get('cs') + if not collation: + self.skipTest( + 'This backend does not support case-sensitive collations.' + ) + qs = Author.objects.order_by(Collate('alias', collation)) + self.assertSequenceEqual(qs, [self.author2, self.author1]) + + def test_language_collation_order_by(self): + collation = connection.features.test_collations.get('swedish-ci') + if not collation: + self.skipTest('This backend does not support language collations.') + author3 = Author.objects.create(alias='O', name='Jones') + author4 = Author.objects.create(alias='Ö', name='Jones') + author5 = Author.objects.create(alias='P', name='Jones') + qs = Author.objects.order_by(Collate(F('alias'), collation), 'name') + self.assertSequenceEqual( + qs, + [self.author1, self.author2, author3, author5, author4], + ) + + def test_invalid_collation(self): + tests = [ + None, + '', + 'et-x-icu" OR ', + '"schema"."collation"', + ] + msg = "Invalid collation name: %r." + for value in tests: + with self.subTest(value), self.assertRaisesMessage(ValueError, msg % value): + Collate(F('alias'), value)