1
0
mirror of https://github.com/django/django.git synced 2025-10-25 06:36:07 +00:00

Fixed #26005 -- Fixed some percent decoding cases in uri_to_iri().

This commit is contained in:
Chronial
2017-02-07 14:55:44 +01:00
committed by Tim Graham
parent 500532c95d
commit 03281d8fe7
4 changed files with 55 additions and 17 deletions

View File

@@ -2,7 +2,7 @@ import codecs
import datetime
import locale
from decimal import Decimal
from urllib.parse import quote, unquote_to_bytes
from urllib.parse import quote
from django.utils import six
from django.utils.functional import Promise
@@ -151,20 +151,57 @@ def iri_to_uri(iri):
return quote(iri, safe="/#%[]=:;$&()+,!?*@'~")
# List of byte values that uri_to_iri() decodes from percent encoding.
# First, the unreserved characters from RFC 3986:
_ascii_ranges = [[45, 46, 95, 126], range(65, 91), range(97, 123)]
_hextobyte = {
(fmt % char).encode(): bytes((char,))
for ascii_range in _ascii_ranges
for char in ascii_range
for fmt in ['%02x', '%02X']
}
# And then everything above 128, because bytes ≥ 128 are part of multibyte
# unicode characters.
_hexdig = '0123456789ABCDEFabcdef'
_hextobyte.update({
(a + b).encode(): bytes.fromhex(a + b)
for a in _hexdig[8:] for b in _hexdig
})
def uri_to_iri(uri):
"""
Converts a Uniform Resource Identifier(URI) into an Internationalized
Resource Identifier(IRI).
This is the algorithm from section 3.2 of RFC 3987.
This is the algorithm from section 3.2 of RFC 3987, excluding step 4.
Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
a string containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
a string containing the encoded result (e.g. '/I%20♥%20Django/').
"""
if uri is None:
return uri
uri = force_bytes(uri)
iri = unquote_to_bytes(uri)
# Fast selective unqote: First, split on '%' and then starting with the
# second block, decode the first 2 bytes if they represent a hex code to
# decode. The rest of the block is the part after '%AB', not containing
# any '%'. Add that to the output without further processing.
bits = uri.split(b'%')
if len(bits) == 1:
iri = uri
else:
parts = [bits[0]]
append = parts.append
hextobyte = _hextobyte
for item in bits[1:]:
hex = item[:2]
if hex in hextobyte:
append(hextobyte[item[:2]])
append(item[2:])
else:
append(b'%')
append(item)
iri = b''.join(parts)
return repercent_broken_unicode(iri).decode()