mirror of
				https://github.com/django/django.git
				synced 2025-10-25 06:36:07 +00:00 
			
		
		
		
	Fixed #24938 -- Added PostgreSQL trigram support.
This commit is contained in:
		
				
					committed by
					
						 Tim Graham
						Tim Graham
					
				
			
			
				
	
			
			
			
						parent
						
							d7334b405f
						
					
				
				
					commit
					1962a96a30
				
			| @@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created | ||||
| from django.db.models import CharField, TextField | ||||
| from django.utils.translation import ugettext_lazy as _ | ||||
|  | ||||
| from .lookups import SearchLookup, Unaccent | ||||
| from .lookups import SearchLookup, TrigramSimilar, Unaccent | ||||
| from .signals import register_hstore_handler | ||||
|  | ||||
|  | ||||
| @@ -17,3 +17,5 @@ class PostgresConfig(AppConfig): | ||||
|         TextField.register_lookup(Unaccent) | ||||
|         CharField.register_lookup(SearchLookup) | ||||
|         TextField.register_lookup(SearchLookup) | ||||
|         CharField.register_lookup(TrigramSimilar) | ||||
|         TextField.register_lookup(TrigramSimilar) | ||||
|   | ||||
| @@ -60,3 +60,8 @@ class SearchLookup(SearchVectorExact): | ||||
|             self.lhs = SearchVector(self.lhs) | ||||
|         lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection) | ||||
|         return lhs, lhs_params | ||||
|  | ||||
|  | ||||
| class TrigramSimilar(PostgresSimpleLookup): | ||||
|     lookup_name = 'trigram_similar' | ||||
|     operator = '%%' | ||||
|   | ||||
| @@ -40,3 +40,9 @@ class UnaccentExtension(CreateExtension): | ||||
|  | ||||
|     def __init__(self): | ||||
|         self.name = 'unaccent' | ||||
|  | ||||
|  | ||||
| class TrigramExtension(CreateExtension): | ||||
|  | ||||
|     def __init__(self): | ||||
|         self.name = 'pg_trgm' | ||||
|   | ||||
| @@ -185,3 +185,19 @@ class SearchRank(Func): | ||||
|  | ||||
|  | ||||
| SearchVectorField.register_lookup(SearchVectorExact) | ||||
|  | ||||
|  | ||||
| class TrigramBase(Func): | ||||
|     def __init__(self, expression, string, **extra): | ||||
|         if not hasattr(string, 'resolve_expression'): | ||||
|             string = Value(string) | ||||
|         super(TrigramBase, self).__init__(expression, string, output_field=FloatField(), **extra) | ||||
|  | ||||
|  | ||||
| class TrigramSimilarity(TrigramBase): | ||||
|     function = 'SIMILARITY' | ||||
|  | ||||
|  | ||||
| class TrigramDistance(TrigramBase): | ||||
|     function = '' | ||||
|     arg_joiner = ' <-> ' | ||||
|   | ||||
| @@ -2,6 +2,32 @@ | ||||
| PostgreSQL specific lookups | ||||
| =========================== | ||||
|  | ||||
| Trigram similarity | ||||
| ================== | ||||
|  | ||||
| .. fieldlookup:: trigram_similar | ||||
|  | ||||
| .. versionadded:: 1.10 | ||||
|  | ||||
| The ``trigram_similar`` lookup allows you to perform trigram lookups, | ||||
| measuring the number of trigrams (three consecutive characters) shared, using a | ||||
| dedicated PostgreSQL extension. A trigram lookup is given an expression and | ||||
| returns results that have a similarity measurement greater than the current | ||||
| similarity threshold. | ||||
|  | ||||
| To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS` | ||||
| and activate the `pg_trgm extension | ||||
| <http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on | ||||
| PostgreSQL. You can install the extension using the | ||||
| :class:`~django.contrib.postgres.operations.TrigramExtension` migration | ||||
| operation. | ||||
|  | ||||
| The ``trigram_similar`` lookup can be used on | ||||
| :class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`:: | ||||
|  | ||||
|     >>> City.objects.filter(name__trigram_similar="Middlesborough") | ||||
|     ['<City: Middlesbrough>'] | ||||
|  | ||||
| ``Unaccent`` | ||||
| ============ | ||||
|  | ||||
|   | ||||
| @@ -27,6 +27,16 @@ the ``django.contrib.postgres.operations`` module. | ||||
|     which will install the ``hstore`` extension and also immediately set up the | ||||
|     connection to interpret hstore data. | ||||
|  | ||||
| ``TrigramExtension`` | ||||
| ==================== | ||||
|  | ||||
| .. class:: TrigramExtension() | ||||
|  | ||||
|     .. versionadded:: 1.10 | ||||
|  | ||||
|     A subclass of :class:`~django.contrib.postgres.operations.CreateExtension` | ||||
|     that installs the ``pg_trgm`` extension. | ||||
|  | ||||
| ``UnaccentExtension`` | ||||
| ===================== | ||||
|  | ||||
|   | ||||
| @@ -189,3 +189,58 @@ if it were an annotated ``SearchVector``:: | ||||
|     [<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>] | ||||
|  | ||||
| .. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS | ||||
|  | ||||
| Trigram similarity | ||||
| ================== | ||||
|  | ||||
| Another approach to searching is trigram similarity. A trigram is a group of | ||||
| three consecutive characters. In addition to the :lookup:`trigram_similar` | ||||
| lookup, you can use a couple of other expressions. | ||||
|  | ||||
| To use them, you need to activate the `pg_trgm extension | ||||
| <http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on | ||||
| PostgreSQL. You can install it using the | ||||
| :class:`~django.contrib.postgres.operations.TrigramExtension` migration | ||||
| operation. | ||||
|  | ||||
| ``TrigramSimilarity`` | ||||
| --------------------- | ||||
|  | ||||
| .. class:: TrigramSimilarity(expression, string, **extra) | ||||
|  | ||||
| .. versionadded:: 1.10 | ||||
|  | ||||
| Accepts a field name or expression, and a string or expression. Returns the | ||||
| trigram similarity between the two arguments. | ||||
|  | ||||
| Usage example:: | ||||
|  | ||||
|     >>> from django.contrib.postgres.search import TrigramSimilarity | ||||
|     >>> Author.objects.create(name='Katy Stevens') | ||||
|     >>> Author.objects.create(name='Stephen Keats') | ||||
|     >>> test = 'Katie Stephens' | ||||
|     >>> Author.objects.annotate( | ||||
|     ...     similarity=TrigramSimilarity('name', test), | ||||
|     ... ).filter(similarity__gt=0.3).order_by('-similarity') | ||||
|     [<Author: Katy Stephens>, <Author: Stephen Keats>] | ||||
|  | ||||
| ``TrigramDistance`` | ||||
| ------------------- | ||||
|  | ||||
| .. class:: TrigramDistance(expression, string, **extra) | ||||
|  | ||||
| .. versionadded:: 1.10 | ||||
|  | ||||
| Accepts a field name or expression, and a string or expression. Returns the | ||||
| trigram distance between the two arguments. | ||||
|  | ||||
| Usage example:: | ||||
|  | ||||
|     >>> from django.contrib.postgres.search import TrigramDistance | ||||
|     >>> Author.objects.create(name='Katy Stevens') | ||||
|     >>> Author.objects.create(name='Stephen Keats') | ||||
|     >>> test = 'Katie Stephens' | ||||
|     >>> Author.objects.annotate( | ||||
|     ...     distance=TrigramDistance('name', test), | ||||
|     ... ).filter(distance__lte=0.7).order_by('distance') | ||||
|     [<Author: Katy Stephens>, <Author: Stephen Keats>] | ||||
|   | ||||
| @@ -33,6 +33,10 @@ search engine. You can search across multiple fields in your relational | ||||
| database, combine the searches with other lookups, use different language | ||||
| configurations and weightings, and rank the results by relevance. | ||||
|  | ||||
| It also now includes trigram support, using the :lookup:`trigram_similar` | ||||
| lookup, and the :class:`~django.contrib.postgres.search.TrigramSimilarity` and | ||||
| :class:`~django.contrib.postgres.search.TrigramDistance` expressions. | ||||
|  | ||||
| Minor features | ||||
| -------------- | ||||
|  | ||||
|   | ||||
| @@ -55,11 +55,12 @@ use :lookup:`unaccented comparison <unaccent>`:: | ||||
| This shows another issue, where we are matching against a different spelling of | ||||
| the name. In this case we have an asymmetry though - a search for ``Helen`` | ||||
| will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option | ||||
| would be to use a trigram comparison, which compares sequences of letters. | ||||
| would be to use a :lookup:`trigram_similar` comparison, which compares | ||||
| sequences of letters. | ||||
|  | ||||
| For example:: | ||||
|  | ||||
|     >>> Author.objects.filter(name__unaccent__lower__trigram='Hélène') | ||||
|     >>> Author.objects.filter(name__unaccent__lower__trigram_similar='Hélène') | ||||
|     [<Author: Helen Mirren>, <Actor: Hélène Joy>] | ||||
|  | ||||
| Now we have a different problem - the longer name of "Helena Bonham Carter" | ||||
|   | ||||
| @@ -5,12 +5,13 @@ from django.db import migrations | ||||
|  | ||||
| try: | ||||
|     from django.contrib.postgres.operations import ( | ||||
|         CreateExtension, HStoreExtension, UnaccentExtension, | ||||
|         CreateExtension, HStoreExtension, TrigramExtension, UnaccentExtension, | ||||
|     ) | ||||
| except ImportError: | ||||
|     from django.test import mock | ||||
|     CreateExtension = mock.Mock() | ||||
|     HStoreExtension = mock.Mock() | ||||
|     TrigramExtension = mock.Mock() | ||||
|     UnaccentExtension = mock.Mock() | ||||
|  | ||||
|  | ||||
| @@ -21,5 +22,6 @@ class Migration(migrations.Migration): | ||||
|         # dash in its name. | ||||
|         CreateExtension('uuid-ossp'), | ||||
|         HStoreExtension(), | ||||
|         TrigramExtension(), | ||||
|         UnaccentExtension(), | ||||
|     ] | ||||
|   | ||||
							
								
								
									
										53
									
								
								tests/postgres_tests/test_trigram.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								tests/postgres_tests/test_trigram.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity | ||||
| from django.test import modify_settings | ||||
|  | ||||
| from . import PostgreSQLTestCase | ||||
| from .models import CharFieldModel, TextFieldModel | ||||
|  | ||||
|  | ||||
| @modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'}) | ||||
| class TrigramTest(PostgreSQLTestCase): | ||||
|     Model = CharFieldModel | ||||
|  | ||||
|     @classmethod | ||||
|     def setUpTestData(cls): | ||||
|         cls.Model.objects.bulk_create([ | ||||
|             cls.Model(field='Matthew'), | ||||
|             cls.Model(field='Cat sat on mat.'), | ||||
|             cls.Model(field='Dog sat on rug.'), | ||||
|         ]) | ||||
|  | ||||
|     def test_trigram_search(self): | ||||
|         self.assertQuerysetEqual( | ||||
|             self.Model.objects.filter(field__trigram_similar='Mathew'), | ||||
|             ['Matthew'], | ||||
|             transform=lambda instance: instance.field, | ||||
|         ) | ||||
|  | ||||
|     def test_trigram_similarity(self): | ||||
|         search = 'Bat sat on cat.' | ||||
|         self.assertQuerysetEqual( | ||||
|             self.Model.objects.filter( | ||||
|                 field__trigram_similar=search, | ||||
|             ).annotate(similarity=TrigramSimilarity('field', search)).order_by('-similarity'), | ||||
|             [('Cat sat on mat.', 0.625), ('Dog sat on rug.', 0.333333)], | ||||
|             transform=lambda instance: (instance.field, instance.similarity), | ||||
|             ordered=True, | ||||
|         ) | ||||
|  | ||||
|     def test_trigram_similarity_alternate(self): | ||||
|         self.assertQuerysetEqual( | ||||
|             self.Model.objects.annotate( | ||||
|                 distance=TrigramDistance('field', 'Bat sat on cat.'), | ||||
|             ).filter(distance__lte=0.7).order_by('distance'), | ||||
|             [('Cat sat on mat.', 0.375), ('Dog sat on rug.', 0.666667)], | ||||
|             transform=lambda instance: (instance.field, instance.distance), | ||||
|             ordered=True, | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class TrigramTextFieldTest(TrigramTest): | ||||
|     """ | ||||
|     TextField has the same behavior as CharField regarding trigram lookups. | ||||
|     """ | ||||
|     Model = TextFieldModel | ||||
		Reference in New Issue
	
	Block a user