diff --git a/django/core/validators.py b/django/core/validators.py index 715d81bdba..b3b76f25f9 100644 --- a/django/core/validators.py +++ b/django/core/validators.py @@ -241,7 +241,7 @@ validate_slug = RegexValidator( 'invalid' ) -slug_unicode_re = _lazy_re_compile(r'^[-\w]+\Z', re.U) +slug_unicode_re = _lazy_re_compile(r'^[-\w]+\Z') validate_unicode_slug = RegexValidator( slug_unicode_re, _("Enter a valid 'slug' consisting of Unicode letters, numbers, underscores, or hyphens."), diff --git a/django/db/migrations/serializer.py b/django/db/migrations/serializer.py index eff3ebe0d7..e15ffd87b5 100644 --- a/django/db/migrations/serializer.py +++ b/django/db/migrations/serializer.py @@ -5,6 +5,7 @@ import decimal import enum import functools import math +import re import types import uuid from importlib import import_module @@ -241,11 +242,14 @@ class RegexSerializer(BaseSerializer): def serialize(self): imports = {"import re"} regex_pattern, pattern_imports = serializer_factory(self.value.pattern).serialize() - regex_flags, flag_imports = serializer_factory(self.value.flags).serialize() + # Turn off default implicit flags (e.g. re.U) because regexes with the + # same implicit and explicit flags aren't equal. + flags = self.value.flags ^ re.compile('').flags + regex_flags, flag_imports = serializer_factory(flags).serialize() imports.update(pattern_imports) imports.update(flag_imports) args = [regex_pattern] - if self.value.flags: + if flags: args.append(regex_flags) return "re.compile(%s)" % ', '.join(args), imports diff --git a/django/forms/fields.py b/django/forms/fields.py index b7846d43a6..b88f3ea61a 100644 --- a/django/forms/fields.py +++ b/django/forms/fields.py @@ -522,7 +522,7 @@ class RegexField(CharField): def _set_regex(self, regex): if isinstance(regex, str): - regex = re.compile(regex, re.UNICODE) + regex = re.compile(regex) self._regex = regex if hasattr(self, '_regex_validator') and self._regex_validator in self.validators: self.validators.remove(self._regex_validator) diff --git a/django/template/base.py b/django/template/base.py index f6259ec7cf..b4d129c0a4 100644 --- a/django/template/base.py +++ b/django/template/base.py @@ -621,7 +621,7 @@ filter_raw_string = r""" 'arg_sep': re.escape(FILTER_ARGUMENT_SEPARATOR), } -filter_re = re.compile(filter_raw_string, re.UNICODE | re.VERBOSE) +filter_re = re.compile(filter_raw_string, re.VERBOSE) class FilterExpression: diff --git a/django/urls/resolvers.py b/django/urls/resolvers.py index 376aedc3c1..0dc6b89745 100644 --- a/django/urls/resolvers.py +++ b/django/urls/resolvers.py @@ -99,7 +99,7 @@ class LocaleRegexDescriptor: Compile and return the given regular expression. """ try: - return re.compile(regex, re.UNICODE) + return re.compile(regex) except re.error as e: raise ImproperlyConfigured( '"%s" is not a valid regular expression: %s' % (regex, e) @@ -453,7 +453,7 @@ class RegexURLResolver(LocaleRegexProvider): # Then, if we have a match, redo the substitution with quoted # arguments in order to return a properly encoded URL. candidate_pat = _prefix.replace('%', '%%') + result - if re.search('^%s%s' % (re.escape(_prefix), pattern), candidate_pat % candidate_subs, re.UNICODE): + if re.search('^%s%s' % (re.escape(_prefix), pattern), candidate_pat % candidate_subs): # safe characters from `pchar` definition of RFC 3986 url = urlquote(candidate_pat % candidate_subs, safe=RFC3986_SUBDELIMS + str('/~:@')) # Don't allow construction of scheme relative urls. @@ -513,5 +513,5 @@ class LocaleRegexURLResolver(RegexURLResolver): regex_string = '' else: regex_string = '^%s/' % language_code - self._regex_dict[language_code] = re.compile(regex_string, re.UNICODE) + self._regex_dict[language_code] = re.compile(regex_string) return self._regex_dict[language_code] diff --git a/django/utils/text.py b/django/utils/text.py index 20df82c85b..26a8b859ef 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -19,8 +19,8 @@ def capfirst(x): # Set up regular expressions -re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U | re.S) -re_chars = re.compile(r'<.*?>|(.)', re.U | re.S) +re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.S) +re_chars = re.compile(r'<.*?>|(.)', re.S) re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S) re_newlines = re.compile(r'\r\n|\r') # Used in normalize_newlines re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))') @@ -417,8 +417,8 @@ def slugify(value, allow_unicode=False): value = force_text(value) if allow_unicode: value = unicodedata.normalize('NFKC', value) - value = re.sub(r'[^\w\s-]', '', value, flags=re.U).strip().lower() - return mark_safe(re.sub(r'[-\s]+', '-', value, flags=re.U)) + value = re.sub(r'[^\w\s-]', '', value).strip().lower() + return mark_safe(re.sub(r'[-\s]+', '-', value)) value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') value = re.sub(r'[^\w\s-]', '', value).strip().lower() return mark_safe(re.sub(r'[-\s]+', '-', value)) diff --git a/tests/migrations/test_writer.py b/tests/migrations/test_writer.py index c1384f6933..1de0fec0aa 100644 --- a/tests/migrations/test_writer.py +++ b/tests/migrations/test_writer.py @@ -247,7 +247,7 @@ class WriterTests(SimpleTestCase): ) def test_serialize_lazy_objects(self): - pattern = re.compile(r'^foo$', re.UNICODE) + pattern = re.compile(r'^foo$') lazy_pattern = SimpleLazyObject(lambda: pattern) self.assertEqual(self.serialize_round_trip(lazy_pattern), pattern) @@ -403,7 +403,7 @@ class WriterTests(SimpleTestCase): """ Make sure compiled regex can be serialized. """ - regex = re.compile(r'^\w+$', re.U) + regex = re.compile(r'^\w+$') self.assertSerializedEqual(regex) def test_serialize_class_based_validators(self): @@ -417,18 +417,18 @@ class WriterTests(SimpleTestCase): self.serialize_round_trip(validator) # Test with a compiled regex. - validator = RegexValidator(regex=re.compile(r'^\w+$', re.U)) + validator = RegexValidator(regex=re.compile(r'^\w+$')) string = MigrationWriter.serialize(validator)[0] - self.assertEqual(string, "django.core.validators.RegexValidator(regex=re.compile('^\\\\w+$', 32))") + self.assertEqual(string, "django.core.validators.RegexValidator(regex=re.compile('^\\\\w+$'))") self.serialize_round_trip(validator) # Test a string regex with flag - validator = RegexValidator(r'^[0-9]+$', flags=re.U) + validator = RegexValidator(r'^[0-9]+$', flags=re.S) string = MigrationWriter.serialize(validator)[0] if PY36: - self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=re.RegexFlag(32))") + self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=re.RegexFlag(16))") else: - self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=32)") + self.assertEqual(string, "django.core.validators.RegexValidator('^[0-9]+$', flags=16)") self.serialize_round_trip(validator) # Test message and code