1
0
mirror of https://github.com/django/django.git synced 2025-10-09 14:59:24 +00:00

Fixed #36520 -- Reverted "Fixed #35440 -- Simplified parse_header_parameters by leveraging stdlid's Message."

This partially reverts commit 9aabe7eae3eeb3e64c5a0f3687118cd806158550.

The simplification of parse_header_parameters using stdlib's Message
is reverted due to a performance regression. The check for the header
maximum length remains in place, per Security Team guidance.

Thanks to David Smith for reporting the regression, and Jacob Walls for
the review.
This commit is contained in:
Natalia 2025-09-15 11:45:20 -03:00 committed by nessita
parent 0e0b4214c3
commit 424e0d8697
2 changed files with 42 additions and 18 deletions

View File

@ -3,9 +3,8 @@ import re
import unicodedata
from binascii import Error as BinasciiError
from datetime import UTC, datetime
from email.message import Message
from email.utils import collapse_rfc2231_value, formatdate
from urllib.parse import quote
from email.utils import formatdate
from urllib.parse import quote, unquote
from urllib.parse import urlencode as original_urlencode
from urllib.parse import urlsplit
@ -316,6 +315,19 @@ def escape_leading_slashes(url):
return url
def _parseparam(s):
while s[:1] == ";":
s = s[1:]
end = s.find(";")
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(";", end + 1)
if end < 0:
end = len(s)
f = s[:end]
yield f.strip()
s = s[end:]
def parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
"""
Parse a Content-type like header.
@ -323,21 +335,34 @@ def parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
If `line` is longer than `max_length`, `ValueError` is raised.
"""
if max_length is not None and line and len(line) > max_length:
if not line:
return "", {}
if max_length is not None and len(line) > max_length:
raise ValueError("Unable to parse header parameters (value too long).")
m = Message()
m["content-type"] = line
params = m.get_params()
parts = _parseparam(";" + line)
key = parts.__next__().lower()
pdict = {}
key = params.pop(0)[0].lower()
for name, value in params:
if not name:
continue
if isinstance(value, tuple):
value = collapse_rfc2231_value(value)
pdict[name] = value
for p in parts:
i = p.find("=")
if i >= 0:
has_encoding = False
name = p[:i].strip().lower()
if name.endswith("*"):
# Embedded lang/encoding, like "filename*=UTF-8''file.ext".
# https://tools.ietf.org/html/rfc2231#section-4
name = name[:-1]
if p.count("'") == 2:
has_encoding = True
value = p[i + 1 :].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace("\\\\", "\\").replace('\\"', '"')
if has_encoding:
encoding, lang, value = value.split("'")
value = unquote(value, encoding=encoding)
pdict[name] = value
return key, pdict

View File

@ -442,7 +442,7 @@ class ParseHeaderParameterTests(unittest.TestCase):
def test_basic(self):
tests = [
("", ("", {})),
(None, ("none", {})),
(None, ("", {})),
("text/plain", ("text/plain", {})),
("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})),
("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})),
@ -507,13 +507,12 @@ class ParseHeaderParameterTests(unittest.TestCase):
"""
Test wrongly formatted RFC 2231 headers (missing double single quotes).
Parsing should not crash (#24209).
But stdlib email still decodes (#35440).
"""
test_data = (
(
"Content-Type: application/x-stuff; "
"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
"'This is ***fun***",
"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
),
("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),