1
0
mirror of https://github.com/django/django.git synced 2025-10-25 06:36:07 +00:00

[py3] Ported django.utils.encoding.

* Renamed smart_unicode to smart_text (but kept the old name under
  Python 2 for backwards compatibility).
* Renamed smart_str to smart_bytes.
* Re-introduced smart_str as an alias for smart_text under Python 3
  and smart_bytes under Python 2 (which is backwards compatible).
  Thus smart_str always returns a str objects.
* Used the new smart_str in a few places where both Python 2 and 3
  want a str.
This commit is contained in:
Aymeric Augustin
2012-07-21 10:00:10 +02:00
parent ee191715ea
commit c5ef65bcf3
125 changed files with 629 additions and 583 deletions

View File

@@ -24,9 +24,13 @@ class DjangoUnicodeDecodeError(UnicodeDecodeError):
class StrAndUnicode(object):
"""
A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.
A class that derives __str__ from __unicode__.
Useful as a mix-in.
On Python 2, __str__ returns the output of __unicode__ encoded as a UTF-8
bytestring. On Python 3, __str__ returns the output of __unicode__.
Useful as a mix-in. If you support Python 2 and 3 with a single code base,
you can inherit this mix-in and just define __unicode__.
"""
if six.PY3:
def __str__(self):
@@ -35,37 +39,36 @@ class StrAndUnicode(object):
def __str__(self):
return self.__unicode__().encode('utf-8')
def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
def smart_text(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
Returns a unicode object representing 's'. Treats bytestrings using the
'encoding' codec.
Returns a text object representing 's' -- unicode on Python 2 and str on
Python 3. Treats bytestrings using the 'encoding' codec.
If strings_only is True, don't convert (some) non-string-like objects.
"""
if isinstance(s, Promise):
# The input is the result of a gettext_lazy() call.
return s
return force_unicode(s, encoding, strings_only, errors)
return force_text(s, encoding, strings_only, errors)
def is_protected_type(obj):
"""Determine if the object instance is of a protected type.
Objects of protected types are preserved as-is when passed to
force_unicode(strings_only=True).
force_text(strings_only=True).
"""
return isinstance(obj, six.integer_types + (type(None), float, Decimal,
datetime.datetime, datetime.date, datetime.time))
def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
def force_text(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
Similar to smart_unicode, except that lazy instances are resolved to
Similar to smart_text, except that lazy instances are resolved to
strings, rather than kept as lazy objects.
If strings_only is True, don't convert (some) non-string-like objects.
"""
# Handle the common case first, saves 30-40% in performance when s
# is an instance of unicode. This function gets called often in that
# setting.
# Handle the common case first, saves 30-40% when s is an instance of
# six.text_type. This function gets called often in that setting.
if isinstance(s, six.text_type):
return s
if strings_only and is_protected_type(s):
@@ -92,7 +95,7 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
# without raising a further exception. We do an
# approximation to what the Exception's standard str()
# output should be.
s = ' '.join([force_unicode(arg, encoding, strings_only,
s = ' '.join([force_text(arg, encoding, strings_only,
errors) for arg in s])
else:
# Note: We use .decode() here, instead of six.text_type(s, encoding,
@@ -108,21 +111,26 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
# working unicode method. Try to handle this without raising a
# further exception by individually forcing the exception args
# to unicode.
s = ' '.join([force_unicode(arg, encoding, strings_only,
s = ' '.join([force_text(arg, encoding, strings_only,
errors) for arg in s])
return s
def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
Returns a bytestring version of 's', encoded as specified in 'encoding'.
If strings_only is True, don't convert (some) non-string-like objects.
"""
if isinstance(s, bytes):
if encoding == 'utf-8':
return s
else:
return s.decode('utf-8', errors).encode(encoding, errors)
if strings_only and (s is None or isinstance(s, int)):
return s
if isinstance(s, Promise):
return six.text_type(s).encode(encoding, errors)
elif not isinstance(s, six.string_types):
if not isinstance(s, six.string_types):
try:
if six.PY3:
return six.text_type(s).encode(encoding)
@@ -133,15 +141,25 @@ def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
# An Exception subclass containing non-ASCII data that doesn't
# know how to print itself properly. We shouldn't raise a
# further exception.
return ' '.join([smart_str(arg, encoding, strings_only,
return ' '.join([smart_bytes(arg, encoding, strings_only,
errors) for arg in s])
return six.text_type(s).encode(encoding, errors)
elif isinstance(s, six.text_type):
return s.encode(encoding, errors)
elif s and encoding != 'utf-8':
return s.decode('utf-8', errors).encode(encoding, errors)
else:
return s
return s.encode(encoding, errors)
if six.PY3:
smart_str = smart_text
else:
smart_str = smart_bytes
# backwards compatibility for Python 2
smart_unicode = smart_text
force_unicode = force_text
smart_str.__doc__ = """\
Apply smart_text in Python 3 and smart_bytes in Python 2.
This is suitable for writing to sys.stdout (for instance).
"""
def iri_to_uri(iri):
"""
@@ -168,7 +186,7 @@ def iri_to_uri(iri):
# converted.
if iri is None:
return iri
return quote(smart_str(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
return quote(smart_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
def filepath_to_uri(path):
"""Convert an file system path to a URI portion that is suitable for
@@ -187,7 +205,7 @@ def filepath_to_uri(path):
return path
# I know about `os.sep` and `os.altsep` but I want to leave
# some flexibility for hardcoding separators.
return quote(smart_str(path).replace("\\", "/"), safe=b"/~!*()'")
return quote(smart_bytes(path).replace("\\", "/"), safe=b"/~!*()'")
# The encoding of the default system locale but falls back to the
# given fallback encoding if the encoding is unsupported by python or could