[py3] Ported django.utils.encoding.

* Renamed smart_unicode to smart_text (but kept the old name under Python 2 for backwards compatibility). * Renamed smart_str to smart_bytes. * Re-introduced smart_str as an alias for smart_text under Python 3 and smart_bytes under Python 2 (which is backwards compatible). Thus smart_str always returns a str objects. * Used the new smart_str in a few places where both Python 2 and 3 want a str.
2025-10-25 06:36:07 +00:00 · 2012-07-21 10:00:10 +02:00
parent ee191715ea
commit c5ef65bcf3
125 changed files with 629 additions and 583 deletions
--- a/django/utils/encoding.py
+++ b/django/utils/encoding.py
@@ -24,9 +24,13 @@ class DjangoUnicodeDecodeError(UnicodeDecodeError):

 class StrAndUnicode(object):
    """
-    A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.
+    A class that derives __str__ from __unicode__.

-    Useful as a mix-in.
+    On Python 2, __str__ returns the output of __unicode__ encoded as a UTF-8
+    bytestring. On Python 3, __str__ returns the output of __unicode__.
+
+    Useful as a mix-in. If you support Python 2 and 3 with a single code base,
+    you can inherit this mix-in and just define __unicode__.
    """
    if six.PY3:
        def __str__(self):
@@ -35,37 +39,36 @@ class StrAndUnicode(object):
        def __str__(self):
            return self.__unicode__().encode('utf-8')

-def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
+def smart_text(s, encoding='utf-8', strings_only=False, errors='strict'):
    """
-    Returns a unicode object representing 's'. Treats bytestrings using the
-    'encoding' codec.
+    Returns a text object representing 's' -- unicode on Python 2 and str on
+    Python 3. Treats bytestrings using the 'encoding' codec.

    If strings_only is True, don't convert (some) non-string-like objects.
    """
    if isinstance(s, Promise):
        # The input is the result of a gettext_lazy() call.
        return s
-    return force_unicode(s, encoding, strings_only, errors)
+    return force_text(s, encoding, strings_only, errors)

 def is_protected_type(obj):
    """Determine if the object instance is of a protected type.

    Objects of protected types are preserved as-is when passed to
-    force_unicode(strings_only=True).
+    force_text(strings_only=True).
    """
    return isinstance(obj, six.integer_types + (type(None), float, Decimal,
        datetime.datetime, datetime.date, datetime.time))

-def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
+def force_text(s, encoding='utf-8', strings_only=False, errors='strict'):
    """
-    Similar to smart_unicode, except that lazy instances are resolved to
+    Similar to smart_text, except that lazy instances are resolved to
    strings, rather than kept as lazy objects.

    If strings_only is True, don't convert (some) non-string-like objects.
    """
-    # Handle the common case first, saves 30-40% in performance when s
-    # is an instance of unicode. This function gets called often in that
-    # setting.
+    # Handle the common case first, saves 30-40% when s is an instance of
+    # six.text_type. This function gets called often in that setting.
    if isinstance(s, six.text_type):
        return s
    if strings_only and is_protected_type(s):
@@ -92,7 +95,7 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
                    # without raising a further exception. We do an
                    # approximation to what the Exception's standard str()
                    # output should be.
-                    s = ' '.join([force_unicode(arg, encoding, strings_only,
+                    s = ' '.join([force_text(arg, encoding, strings_only,
                            errors) for arg in s])
        else:
            # Note: We use .decode() here, instead of six.text_type(s, encoding,
@@ -108,21 +111,26 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
            # working unicode method. Try to handle this without raising a
            # further exception by individually forcing the exception args
            # to unicode.
-            s = ' '.join([force_unicode(arg, encoding, strings_only,
+            s = ' '.join([force_text(arg, encoding, strings_only,
                    errors) for arg in s])
    return s

-def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
+def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
    """
    Returns a bytestring version of 's', encoded as specified in 'encoding'.

    If strings_only is True, don't convert (some) non-string-like objects.
    """
+    if isinstance(s, bytes):
+        if encoding == 'utf-8':
+            return s
+        else:
+            return s.decode('utf-8', errors).encode(encoding, errors)
    if strings_only and (s is None or isinstance(s, int)):
        return s
    if isinstance(s, Promise):
        return six.text_type(s).encode(encoding, errors)
-    elif not isinstance(s, six.string_types):
+    if not isinstance(s, six.string_types):
        try:
            if six.PY3:
                return six.text_type(s).encode(encoding)
@@ -133,15 +141,25 @@ def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
                # An Exception subclass containing non-ASCII data that doesn't
                # know how to print itself properly. We shouldn't raise a
                # further exception.
-                return ' '.join([smart_str(arg, encoding, strings_only,
+                return ' '.join([smart_bytes(arg, encoding, strings_only,
                        errors) for arg in s])
            return six.text_type(s).encode(encoding, errors)
-    elif isinstance(s, six.text_type):
-        return s.encode(encoding, errors)
-    elif s and encoding != 'utf-8':
-        return s.decode('utf-8', errors).encode(encoding, errors)
    else:
-        return s
+        return s.encode(encoding, errors)
+
+if six.PY3:
+    smart_str = smart_text
+else:
+    smart_str = smart_bytes
+    # backwards compatibility for Python 2
+    smart_unicode = smart_text
+    force_unicode = force_text
+
+smart_str.__doc__ = """\
+Apply smart_text in Python 3 and smart_bytes in Python 2.
+
+This is suitable for writing to sys.stdout (for instance).
+"""

 def iri_to_uri(iri):
    """
@@ -168,7 +186,7 @@ def iri_to_uri(iri):
    # converted.
    if iri is None:
        return iri
-    return quote(smart_str(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
+    return quote(smart_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")

 def filepath_to_uri(path):
    """Convert an file system path to a URI portion that is suitable for
@@ -187,7 +205,7 @@ def filepath_to_uri(path):
        return path
    # I know about `os.sep` and `os.altsep` but I want to leave
    # some flexibility for hardcoding separators.
-    return quote(smart_str(path).replace("\\", "/"), safe=b"/~!*()'")
+    return quote(smart_bytes(path).replace("\\", "/"), safe=b"/~!*()'")

 # The encoding of the default system locale but falls back to the
 # given fallback encoding if the encoding is unsupported by python or could