mirror of
https://github.com/django/django.git
synced 2025-10-25 06:36:07 +00:00
[py3] Ported django.utils.encoding.
* Renamed smart_unicode to smart_text (but kept the old name under Python 2 for backwards compatibility). * Renamed smart_str to smart_bytes. * Re-introduced smart_str as an alias for smart_text under Python 3 and smart_bytes under Python 2 (which is backwards compatible). Thus smart_str always returns a str objects. * Used the new smart_str in a few places where both Python 2 and 3 want a str.
This commit is contained in:
@@ -24,9 +24,13 @@ class DjangoUnicodeDecodeError(UnicodeDecodeError):
|
||||
|
||||
class StrAndUnicode(object):
|
||||
"""
|
||||
A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.
|
||||
A class that derives __str__ from __unicode__.
|
||||
|
||||
Useful as a mix-in.
|
||||
On Python 2, __str__ returns the output of __unicode__ encoded as a UTF-8
|
||||
bytestring. On Python 3, __str__ returns the output of __unicode__.
|
||||
|
||||
Useful as a mix-in. If you support Python 2 and 3 with a single code base,
|
||||
you can inherit this mix-in and just define __unicode__.
|
||||
"""
|
||||
if six.PY3:
|
||||
def __str__(self):
|
||||
@@ -35,37 +39,36 @@ class StrAndUnicode(object):
|
||||
def __str__(self):
|
||||
return self.__unicode__().encode('utf-8')
|
||||
|
||||
def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
def smart_text(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
"""
|
||||
Returns a unicode object representing 's'. Treats bytestrings using the
|
||||
'encoding' codec.
|
||||
Returns a text object representing 's' -- unicode on Python 2 and str on
|
||||
Python 3. Treats bytestrings using the 'encoding' codec.
|
||||
|
||||
If strings_only is True, don't convert (some) non-string-like objects.
|
||||
"""
|
||||
if isinstance(s, Promise):
|
||||
# The input is the result of a gettext_lazy() call.
|
||||
return s
|
||||
return force_unicode(s, encoding, strings_only, errors)
|
||||
return force_text(s, encoding, strings_only, errors)
|
||||
|
||||
def is_protected_type(obj):
|
||||
"""Determine if the object instance is of a protected type.
|
||||
|
||||
Objects of protected types are preserved as-is when passed to
|
||||
force_unicode(strings_only=True).
|
||||
force_text(strings_only=True).
|
||||
"""
|
||||
return isinstance(obj, six.integer_types + (type(None), float, Decimal,
|
||||
datetime.datetime, datetime.date, datetime.time))
|
||||
|
||||
def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
def force_text(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
"""
|
||||
Similar to smart_unicode, except that lazy instances are resolved to
|
||||
Similar to smart_text, except that lazy instances are resolved to
|
||||
strings, rather than kept as lazy objects.
|
||||
|
||||
If strings_only is True, don't convert (some) non-string-like objects.
|
||||
"""
|
||||
# Handle the common case first, saves 30-40% in performance when s
|
||||
# is an instance of unicode. This function gets called often in that
|
||||
# setting.
|
||||
# Handle the common case first, saves 30-40% when s is an instance of
|
||||
# six.text_type. This function gets called often in that setting.
|
||||
if isinstance(s, six.text_type):
|
||||
return s
|
||||
if strings_only and is_protected_type(s):
|
||||
@@ -92,7 +95,7 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
# without raising a further exception. We do an
|
||||
# approximation to what the Exception's standard str()
|
||||
# output should be.
|
||||
s = ' '.join([force_unicode(arg, encoding, strings_only,
|
||||
s = ' '.join([force_text(arg, encoding, strings_only,
|
||||
errors) for arg in s])
|
||||
else:
|
||||
# Note: We use .decode() here, instead of six.text_type(s, encoding,
|
||||
@@ -108,21 +111,26 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
# working unicode method. Try to handle this without raising a
|
||||
# further exception by individually forcing the exception args
|
||||
# to unicode.
|
||||
s = ' '.join([force_unicode(arg, encoding, strings_only,
|
||||
s = ' '.join([force_text(arg, encoding, strings_only,
|
||||
errors) for arg in s])
|
||||
return s
|
||||
|
||||
def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
def smart_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
"""
|
||||
Returns a bytestring version of 's', encoded as specified in 'encoding'.
|
||||
|
||||
If strings_only is True, don't convert (some) non-string-like objects.
|
||||
"""
|
||||
if isinstance(s, bytes):
|
||||
if encoding == 'utf-8':
|
||||
return s
|
||||
else:
|
||||
return s.decode('utf-8', errors).encode(encoding, errors)
|
||||
if strings_only and (s is None or isinstance(s, int)):
|
||||
return s
|
||||
if isinstance(s, Promise):
|
||||
return six.text_type(s).encode(encoding, errors)
|
||||
elif not isinstance(s, six.string_types):
|
||||
if not isinstance(s, six.string_types):
|
||||
try:
|
||||
if six.PY3:
|
||||
return six.text_type(s).encode(encoding)
|
||||
@@ -133,15 +141,25 @@ def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
|
||||
# An Exception subclass containing non-ASCII data that doesn't
|
||||
# know how to print itself properly. We shouldn't raise a
|
||||
# further exception.
|
||||
return ' '.join([smart_str(arg, encoding, strings_only,
|
||||
return ' '.join([smart_bytes(arg, encoding, strings_only,
|
||||
errors) for arg in s])
|
||||
return six.text_type(s).encode(encoding, errors)
|
||||
elif isinstance(s, six.text_type):
|
||||
return s.encode(encoding, errors)
|
||||
elif s and encoding != 'utf-8':
|
||||
return s.decode('utf-8', errors).encode(encoding, errors)
|
||||
else:
|
||||
return s
|
||||
return s.encode(encoding, errors)
|
||||
|
||||
if six.PY3:
|
||||
smart_str = smart_text
|
||||
else:
|
||||
smart_str = smart_bytes
|
||||
# backwards compatibility for Python 2
|
||||
smart_unicode = smart_text
|
||||
force_unicode = force_text
|
||||
|
||||
smart_str.__doc__ = """\
|
||||
Apply smart_text in Python 3 and smart_bytes in Python 2.
|
||||
|
||||
This is suitable for writing to sys.stdout (for instance).
|
||||
"""
|
||||
|
||||
def iri_to_uri(iri):
|
||||
"""
|
||||
@@ -168,7 +186,7 @@ def iri_to_uri(iri):
|
||||
# converted.
|
||||
if iri is None:
|
||||
return iri
|
||||
return quote(smart_str(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
|
||||
return quote(smart_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
|
||||
|
||||
def filepath_to_uri(path):
|
||||
"""Convert an file system path to a URI portion that is suitable for
|
||||
@@ -187,7 +205,7 @@ def filepath_to_uri(path):
|
||||
return path
|
||||
# I know about `os.sep` and `os.altsep` but I want to leave
|
||||
# some flexibility for hardcoding separators.
|
||||
return quote(smart_str(path).replace("\\", "/"), safe=b"/~!*()'")
|
||||
return quote(smart_bytes(path).replace("\\", "/"), safe=b"/~!*()'")
|
||||
|
||||
# The encoding of the default system locale but falls back to the
|
||||
# given fallback encoding if the encoding is unsupported by python or could
|
||||
|
||||
Reference in New Issue
Block a user