1
0
mirror of https://github.com/django/django.git synced 2025-03-29 02:30:48 +00:00
django/django/utils/encoding.py
Malcolm Tredinnick 356662cf74 Implemented auto-escaping of variable output in templates. Fully controllable by template authors and it's possible to write filters and templates that simulataneously work in both auto-escaped and non-auto-escaped environments if you need to. Fixed #2359
See documentation in templates.txt and templates_python.txt for how everything
works.

Backwards incompatible if you're inserting raw HTML output via template variables.

Based on an original design from Simon Willison and with debugging help from Michael Radziej.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@6671 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2007-11-14 12:58:53 +00:00

103 lines
3.5 KiB
Python

import types
import urllib
import datetime
from django.utils.functional import Promise
from django.utils.safestring import SafeData, mark_safe
class DjangoUnicodeDecodeError(UnicodeDecodeError):
def __init__(self, obj, *args):
self.obj = obj
UnicodeDecodeError.__init__(self, *args)
def __str__(self):
original = UnicodeDecodeError.__str__(self)
return '%s. You passed in %r (%s)' % (original, self.obj,
type(self.obj))
class StrAndUnicode(object):
"""
A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.
Useful as a mix-in.
"""
def __str__(self):
return self.__unicode__().encode('utf-8')
def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
Returns a unicode object representing 's'. Treats bytestrings using the
'encoding' codec.
If strings_only is True, don't convert (some) non-string-like objects.
"""
if isinstance(s, Promise):
# The input is the result of a gettext_lazy() call.
return s
return force_unicode(s, encoding, strings_only, errors)
def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
Similar to smart_unicode, except that lazy instances are resolved to
strings, rather than kept as lazy objects.
If strings_only is True, don't convert (some) non-string-like objects.
"""
if strings_only and isinstance(s, (types.NoneType, int, long, datetime.datetime, datetime.date, datetime.time, float)):
return s
try:
if not isinstance(s, basestring,):
if hasattr(s, '__unicode__'):
s = unicode(s)
else:
s = unicode(str(s), encoding, errors)
elif not isinstance(s, unicode):
# Note: We use .decode() here, instead of unicode(s, encoding,
# errors), so that if s is a SafeString, it ends up being a
# SafeUnicode at the end.
s = s.decode(encoding, errors)
except UnicodeDecodeError, e:
raise DjangoUnicodeDecodeError(s, *e.args)
return s
def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
"""
Returns a bytestring version of 's', encoded as specified in 'encoding'.
If strings_only is True, don't convert (some) non-string-like objects.
"""
if strings_only and isinstance(s, (types.NoneType, int)):
return s
if isinstance(s, Promise):
return unicode(s).encode(encoding, errors)
elif not isinstance(s, basestring):
try:
return str(s)
except UnicodeEncodeError:
return unicode(s).encode(encoding, errors)
elif isinstance(s, unicode):
return s.encode(encoding, errors)
elif s and encoding != 'utf-8':
return s.decode('utf-8', errors).encode(encoding, errors)
else:
return s
def iri_to_uri(iri):
"""
Convert an Internationalized Resource Identifier (IRI) portion to a URI
portion that is suitable for inclusion in a URL.
This is the algorithm from section 3.1 of RFC 3987. However, since we are
assuming input is either UTF-8 or unicode already, we can simplify things a
little from the full method.
Returns an ASCII string containing the encoded result.
"""
# The list of safe characters here is constructed from the printable ASCII
# characters that are not explicitly excluded by the list at the end of
# section 3.1 of RFC 3987.
if iri is None:
return iri
return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?')