mirror of
https://github.com/django/django.git
synced 2025-10-25 06:36:07 +00:00
Fixed #26005 -- Fixed some percent decoding cases in uri_to_iri().
This commit is contained in:
@@ -2,7 +2,7 @@ import codecs
|
||||
import datetime
|
||||
import locale
|
||||
from decimal import Decimal
|
||||
from urllib.parse import quote, unquote_to_bytes
|
||||
from urllib.parse import quote
|
||||
|
||||
from django.utils import six
|
||||
from django.utils.functional import Promise
|
||||
@@ -151,20 +151,57 @@ def iri_to_uri(iri):
|
||||
return quote(iri, safe="/#%[]=:;$&()+,!?*@'~")
|
||||
|
||||
|
||||
# List of byte values that uri_to_iri() decodes from percent encoding.
|
||||
# First, the unreserved characters from RFC 3986:
|
||||
_ascii_ranges = [[45, 46, 95, 126], range(65, 91), range(97, 123)]
|
||||
_hextobyte = {
|
||||
(fmt % char).encode(): bytes((char,))
|
||||
for ascii_range in _ascii_ranges
|
||||
for char in ascii_range
|
||||
for fmt in ['%02x', '%02X']
|
||||
}
|
||||
# And then everything above 128, because bytes ≥ 128 are part of multibyte
|
||||
# unicode characters.
|
||||
_hexdig = '0123456789ABCDEFabcdef'
|
||||
_hextobyte.update({
|
||||
(a + b).encode(): bytes.fromhex(a + b)
|
||||
for a in _hexdig[8:] for b in _hexdig
|
||||
})
|
||||
|
||||
|
||||
def uri_to_iri(uri):
|
||||
"""
|
||||
Converts a Uniform Resource Identifier(URI) into an Internationalized
|
||||
Resource Identifier(IRI).
|
||||
|
||||
This is the algorithm from section 3.2 of RFC 3987.
|
||||
This is the algorithm from section 3.2 of RFC 3987, excluding step 4.
|
||||
|
||||
Takes an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and returns
|
||||
a string containing the encoded result (e.g. '/I \xe2\x99\xa5 Django/').
|
||||
a string containing the encoded result (e.g. '/I%20♥%20Django/').
|
||||
"""
|
||||
if uri is None:
|
||||
return uri
|
||||
uri = force_bytes(uri)
|
||||
iri = unquote_to_bytes(uri)
|
||||
# Fast selective unqote: First, split on '%' and then starting with the
|
||||
# second block, decode the first 2 bytes if they represent a hex code to
|
||||
# decode. The rest of the block is the part after '%AB', not containing
|
||||
# any '%'. Add that to the output without further processing.
|
||||
bits = uri.split(b'%')
|
||||
if len(bits) == 1:
|
||||
iri = uri
|
||||
else:
|
||||
parts = [bits[0]]
|
||||
append = parts.append
|
||||
hextobyte = _hextobyte
|
||||
for item in bits[1:]:
|
||||
hex = item[:2]
|
||||
if hex in hextobyte:
|
||||
append(hextobyte[item[:2]])
|
||||
append(item[2:])
|
||||
else:
|
||||
append(b'%')
|
||||
append(item)
|
||||
iri = b''.join(parts)
|
||||
return repercent_broken_unicode(iri).decode()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user