mirror of
				https://github.com/django/django.git
				synced 2025-10-31 09:41:08 +00:00 
			
		
		
		
	Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams.
This also removes unused parse_header() and _parse_header_params() helpers in django.http.multipartparser.
This commit is contained in:
		
				
					committed by
					
						 Mariusz Felisiak
						Mariusz Felisiak
					
				
			
			
				
	
			
			
			
						parent
						
							bff5c114be
						
					
				
				
					commit
					d4d5427571
				
			| @@ -8,7 +8,6 @@ import base64 | ||||
| import binascii | ||||
| import collections | ||||
| import html | ||||
| from urllib.parse import unquote | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core.exceptions import ( | ||||
| @@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size): | ||||
|         # This terminology ("main value" and "dictionary of | ||||
|         # parameters") is from the Python docs. | ||||
|         try: | ||||
|             main_value_pair, params = parse_header(line) | ||||
|             main_value_pair, params = parse_header_parameters(line.decode()) | ||||
|             name, value = main_value_pair.split(":", 1) | ||||
|             params = {k: v.encode() for k, v in params.items()} | ||||
|         except ValueError:  # Invalid header. | ||||
|             continue | ||||
|  | ||||
| @@ -703,50 +703,3 @@ class Parser: | ||||
|         for sub_stream in boundarystream: | ||||
|             # Iterate over each part | ||||
|             yield parse_boundary_stream(sub_stream, 1024) | ||||
|  | ||||
|  | ||||
| def parse_header(line): | ||||
|     """ | ||||
|     Parse the header into a key-value. | ||||
|  | ||||
|     Input (line): bytes, output: str for key/name, bytes for values which | ||||
|     will be decoded later. | ||||
|     """ | ||||
|     plist = _parse_header_params(b";" + line) | ||||
|     key = plist.pop(0).lower().decode("ascii") | ||||
|     pdict = {} | ||||
|     for p in plist: | ||||
|         i = p.find(b"=") | ||||
|         if i >= 0: | ||||
|             has_encoding = False | ||||
|             name = p[:i].strip().lower().decode("ascii") | ||||
|             if name.endswith("*"): | ||||
|                 # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") | ||||
|                 # https://tools.ietf.org/html/rfc2231#section-4 | ||||
|                 name = name[:-1] | ||||
|                 if p.count(b"'") == 2: | ||||
|                     has_encoding = True | ||||
|             value = p[i + 1 :].strip() | ||||
|             if len(value) >= 2 and value[:1] == value[-1:] == b'"': | ||||
|                 value = value[1:-1] | ||||
|                 value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"') | ||||
|             if has_encoding: | ||||
|                 encoding, lang, value = value.split(b"'") | ||||
|                 value = unquote(value.decode(), encoding=encoding.decode()) | ||||
|             pdict[name] = value | ||||
|     return key, pdict | ||||
|  | ||||
|  | ||||
| def _parse_header_params(s): | ||||
|     plist = [] | ||||
|     while s[:1] == b";": | ||||
|         s = s[1:] | ||||
|         end = s.find(b";") | ||||
|         while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: | ||||
|             end = s.find(b";", end + 1) | ||||
|         if end < 0: | ||||
|             end = len(s) | ||||
|         f = s[:end] | ||||
|         plist.append(f.strip()) | ||||
|         s = s[end:] | ||||
|     return plist | ||||
|   | ||||
| @@ -11,6 +11,7 @@ from urllib.parse import ( | ||||
|     _splitnetloc, | ||||
|     _splitparams, | ||||
|     scheme_chars, | ||||
|     unquote, | ||||
| ) | ||||
| from urllib.parse import urlencode as original_urlencode | ||||
| from urllib.parse import uses_params | ||||
| @@ -387,15 +388,25 @@ def parse_header_parameters(line): | ||||
|     Return the main content-type and a dictionary of options. | ||||
|     """ | ||||
|     parts = _parseparam(";" + line) | ||||
|     key = parts.__next__() | ||||
|     key = parts.__next__().lower() | ||||
|     pdict = {} | ||||
|     for p in parts: | ||||
|         i = p.find("=") | ||||
|         if i >= 0: | ||||
|             has_encoding = False | ||||
|             name = p[:i].strip().lower() | ||||
|             if name.endswith("*"): | ||||
|                 # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") | ||||
|                 # https://tools.ietf.org/html/rfc2231#section-4 | ||||
|                 name = name[:-1] | ||||
|                 if p.count("'") == 2: | ||||
|                     has_encoding = True | ||||
|             value = p[i + 1 :].strip() | ||||
|             if len(value) >= 2 and value[0] == value[-1] == '"': | ||||
|                 value = value[1:-1] | ||||
|                 value = value.replace("\\\\", "\\").replace('\\"', '"') | ||||
|             if has_encoding: | ||||
|                 encoding, lang, value = value.split("'") | ||||
|                 value = unquote(value, encoding=encoding) | ||||
|             pdict[name] = value | ||||
|     return key, pdict | ||||
|   | ||||
| @@ -17,7 +17,6 @@ from django.http.multipartparser import ( | ||||
|     MultiPartParser, | ||||
|     MultiPartParserError, | ||||
|     Parser, | ||||
|     parse_header, | ||||
| ) | ||||
| from django.test import SimpleTestCase, TestCase, client, override_settings | ||||
|  | ||||
| @@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase): | ||||
|         for file_name in CANDIDATE_INVALID_FILE_NAMES: | ||||
|             with self.subTest(file_name=file_name): | ||||
|                 self.assertIsNone(parser.sanitize_file_name(file_name)) | ||||
|  | ||||
|     def test_rfc2231_parsing(self): | ||||
|         test_data = ( | ||||
|             ( | ||||
|                 b"Content-Type: application/x-stuff; " | ||||
|                 b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A", | ||||
|                 "This is ***fun***", | ||||
|             ), | ||||
|             ( | ||||
|                 b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html", | ||||
|                 "foo-ä.html", | ||||
|             ), | ||||
|             ( | ||||
|                 b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html", | ||||
|                 "foo-ä.html", | ||||
|             ), | ||||
|         ) | ||||
|         for raw_line, expected_title in test_data: | ||||
|             parsed = parse_header(raw_line) | ||||
|             self.assertEqual(parsed[1]["title"], expected_title) | ||||
|  | ||||
|     def test_rfc2231_wrong_title(self): | ||||
|         """ | ||||
|         Test wrongly formatted RFC 2231 headers (missing double single quotes). | ||||
|         Parsing should not crash (#24209). | ||||
|         """ | ||||
|         test_data = ( | ||||
|             ( | ||||
|                 b"Content-Type: application/x-stuff; " | ||||
|                 b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A", | ||||
|                 b"'This%20is%20%2A%2A%2Afun%2A%2A%2A", | ||||
|             ), | ||||
|             (b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"), | ||||
|             (b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"), | ||||
|         ) | ||||
|         for raw_line, expected_title in test_data: | ||||
|             parsed = parse_header(raw_line) | ||||
|             self.assertEqual(parsed[1]["title"], expected_title) | ||||
|  | ||||
|     def test_parse_header_with_double_quotes_and_semicolon(self): | ||||
|         self.assertEqual( | ||||
|             parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'), | ||||
|             ("form-data", {"name": b"files", "filename": b'fo"o;bar'}), | ||||
|         ) | ||||
|   | ||||
| @@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase): | ||||
|         for header, expected in tests: | ||||
|             with self.subTest(header=header): | ||||
|                 self.assertEqual(parse_header_parameters(header), expected) | ||||
|  | ||||
|     def test_rfc2231_parsing(self): | ||||
|         test_data = ( | ||||
|             ( | ||||
|                 "Content-Type: application/x-stuff; " | ||||
|                 "title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A", | ||||
|                 "This is ***fun***", | ||||
|             ), | ||||
|             ( | ||||
|                 "Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html", | ||||
|                 "foo-ä.html", | ||||
|             ), | ||||
|             ( | ||||
|                 "Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html", | ||||
|                 "foo-ä.html", | ||||
|             ), | ||||
|         ) | ||||
|         for raw_line, expected_title in test_data: | ||||
|             parsed = parse_header_parameters(raw_line) | ||||
|             self.assertEqual(parsed[1]["title"], expected_title) | ||||
|  | ||||
|     def test_rfc2231_wrong_title(self): | ||||
|         """ | ||||
|         Test wrongly formatted RFC 2231 headers (missing double single quotes). | ||||
|         Parsing should not crash (#24209). | ||||
|         """ | ||||
|         test_data = ( | ||||
|             ( | ||||
|                 "Content-Type: application/x-stuff; " | ||||
|                 "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A", | ||||
|                 "'This%20is%20%2A%2A%2Afun%2A%2A%2A", | ||||
|             ), | ||||
|             ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"), | ||||
|             ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"), | ||||
|         ) | ||||
|         for raw_line, expected_title in test_data: | ||||
|             parsed = parse_header_parameters(raw_line) | ||||
|             self.assertEqual(parsed[1]["title"], expected_title) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user