mirror of
https://github.com/django/django.git
synced 2025-10-25 06:36:07 +00:00
Fixed #19508 -- Implemented uri_to_iri as per RFC.
Thanks Loic Bistuer for helping in shaping the patch and Claude Paroz for the review.
This commit is contained in:
committed by
Loic Bistuer
parent
3af5af1a61
commit
10b17a22be
@@ -1,3 +1,4 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import codecs
|
||||
@@ -7,7 +8,9 @@ import locale
|
||||
|
||||
from django.utils.functional import Promise
|
||||
from django.utils import six
|
||||
from django.utils.six.moves.urllib.parse import quote
|
||||
from django.utils.six.moves.urllib.parse import quote, unquote
|
||||
if six.PY3:
|
||||
from urllib.parse import unquote_to_bytes
|
||||
|
||||
|
||||
class DjangoUnicodeDecodeError(UnicodeDecodeError):
|
||||
@@ -185,7 +188,9 @@ def iri_to_uri(iri):
|
||||
assuming input is either UTF-8 or unicode already, we can simplify things a
|
||||
little from the full method.
|
||||
|
||||
Returns an ASCII string containing the encoded result.
|
||||
Takes an IRI in UTF-8 bytes (e.g. '/I \xe2\x99\xa5 Django/') or unicode
|
||||
(e.g. '/I ♥ Django/') and returns ASCII bytes containing the encoded result
|
||||
(e.g. '/I%20%E2%99%A5%20Django/').
|
||||
"""
|
||||
# The list of safe characters here is constructed from the "reserved" and
|
||||
# "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
|
||||
@@ -204,6 +209,38 @@ def iri_to_uri(iri):
|
||||
return quote(force_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
|
||||
|
||||
|
||||
def uri_to_iri(uri):
|
||||
"""
|
||||
Converts a Uniform Resource Identifier(URI) into an Internationalized
|
||||
Resource Identifier(IRI).
|
||||
|
||||
This is the algorithm from section 3.2 of RFC 3987.
|
||||
|
||||
Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
|
||||
unicode containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
|
||||
"""
|
||||
if uri is None:
|
||||
return uri
|
||||
uri = force_bytes(uri)
|
||||
iri = unquote_to_bytes(uri) if six.PY3 else unquote(uri)
|
||||
return repercent_broken_unicode(iri).decode('utf-8')
|
||||
|
||||
|
||||
def repercent_broken_unicode(path):
|
||||
"""
|
||||
As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,
|
||||
we need to re-percent-encode any octet produced that is not part of a
|
||||
strictly legal UTF-8 octet sequence.
|
||||
"""
|
||||
try:
|
||||
path.decode('utf-8')
|
||||
except UnicodeDecodeError as e:
|
||||
repercent = quote(path[e.start:e.end], safe=b"/#%[]=:;$&()+,!?*@'~")
|
||||
path = repercent_broken_unicode(
|
||||
path[:e.start] + force_bytes(repercent) + path[e.end:])
|
||||
return path
|
||||
|
||||
|
||||
def filepath_to_uri(path):
|
||||
"""Convert a file system path to a URI portion that is suitable for
|
||||
inclusion in a URL.
|
||||
|
||||
Reference in New Issue
Block a user