mirror of
				https://github.com/django/django.git
				synced 2025-10-25 22:56:12 +00:00 
			
		
		
		
	[1.8.x] Fixed #24240 -- Allowed GZipping a Unicode StreamingHttpResponse
make_bytes() assumed that if the Content-Encoding header is set, then
everything had already been dealt with bytes-wise, but in a streaming
situation this was not necessarily the case.
make_bytes() is only called when necessary when working with a
StreamingHttpResponse iterable, but by that point the middleware has
added the Content-Encoding header and thus make_bytes() tried to call
bytes(value) (and dies). If it had been a normal HttpResponse,
make_bytes() would have been called when the content was set, well
before the middleware set the Content-Encoding header.
This commit removes the special casing when Content-Encoding is set,
allowing unicode strings to be encoded during the iteration before they
are e.g. gzipped. This behaviour was added a long time ago for #4969 and
it doesn't appear to be necessary any more, as everything is correctly
made into bytes at the appropriate places.
Two new tests, to show that supplying non-ASCII characters to a
StreamingHttpResponse works fine normally, and when passed through the
GZip middleware (the latter dies without the change to make_bytes()).
Removes the test with a nonsense Content-Encoding and Unicode input - if
this were to happen, it can still be encoded as bytes fine.
Backport of 250aa7c39b from master.
			
			
This commit is contained in:
		
				
					committed by
					
						 Claude Paroz
						Claude Paroz
					
				
			
			
				
	
			
			
			
						parent
						
							43b0131fb5
						
					
				
				
					commit
					d88c24f436
				
			
							
								
								
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -452,6 +452,7 @@ answer newbie questions, and generally made Django that much better: | ||||
|     Matt Deacalion Stevens <matt@dirtymonkey.co.uk> | ||||
|     Matt Dennenbaum | ||||
|     Matthew Flanagan <http://wadofstuff.blogspot.com> | ||||
|     Matthew Somerville <matthew-github@dracos.co.uk> | ||||
|     Matthew Tretter <m@tthewwithanm.com> | ||||
|     Matthias Kestenholz <mk@406.ch> | ||||
|     Matthias Pronk <django@masida.nl> | ||||
|   | ||||
| @@ -282,10 +282,6 @@ class HttpResponseBase(six.Iterator): | ||||
|         # an instance of a subclass, this function returns `bytes(value)`. | ||||
|         # This doesn't make a copy when `value` already contains bytes. | ||||
|  | ||||
|         # If content is already encoded (eg. gzip), assume bytes. | ||||
|         if self.has_header('Content-Encoding'): | ||||
|             return bytes(value) | ||||
|  | ||||
|         # Handle string types -- we can't rely on force_bytes here because: | ||||
|         # - under Python 3 it attempts str conversion first | ||||
|         # - when self._charset != 'utf-8' it re-encodes the content | ||||
|   | ||||
| @@ -348,14 +348,6 @@ class HttpResponseTests(unittest.TestCase): | ||||
|         #'\xde\x9e' == unichr(1950).encode('utf-8') | ||||
|         self.assertEqual(r.content, b'123\xde\x9e') | ||||
|  | ||||
|         # with Content-Encoding header | ||||
|         r = HttpResponse() | ||||
|         r['Content-Encoding'] = 'winning' | ||||
|         r.content = [b'abc', b'def'] | ||||
|         self.assertEqual(r.content, b'abcdef') | ||||
|         self.assertRaises(TypeError if six.PY3 else UnicodeEncodeError, | ||||
|                           setattr, r, 'content', ['\u079e']) | ||||
|  | ||||
|         # .content can safely be accessed multiple times. | ||||
|         r = HttpResponse(iter(['hello', 'world'])) | ||||
|         self.assertEqual(r.content, r.content) | ||||
| @@ -512,6 +504,14 @@ class StreamingHttpResponseTests(TestCase): | ||||
|         self.assertEqual(list(r), [b'abc', b'def']) | ||||
|         self.assertEqual(list(r), []) | ||||
|  | ||||
|         # iterating over Unicode strings still yields bytestring chunks. | ||||
|         r.streaming_content = iter(['hello', 'café']) | ||||
|         chunks = list(r) | ||||
|         # '\xc3\xa9' == unichr(233).encode('utf-8') | ||||
|         self.assertEqual(chunks, [b'hello', b'caf\xc3\xa9']) | ||||
|         for chunk in chunks: | ||||
|             self.assertIsInstance(chunk, six.binary_type) | ||||
|  | ||||
|         # streaming responses don't have a `content` attribute. | ||||
|         self.assertFalse(hasattr(r, 'content')) | ||||
|  | ||||
|   | ||||
| @@ -590,6 +590,7 @@ class GZipMiddlewareTest(TestCase): | ||||
|     compressible_string = b'a' * 500 | ||||
|     uncompressible_string = b''.join(six.int2byte(random.randint(0, 255)) for _ in range(500)) | ||||
|     sequence = [b'a' * 500, b'b' * 200, b'a' * 300] | ||||
|     sequence_unicode = ['a' * 500, 'é' * 200, 'a' * 300] | ||||
|  | ||||
|     def setUp(self): | ||||
|         self.req = RequestFactory().get('/') | ||||
| @@ -601,6 +602,8 @@ class GZipMiddlewareTest(TestCase): | ||||
|         self.resp['Content-Type'] = 'text/html; charset=UTF-8' | ||||
|         self.stream_resp = StreamingHttpResponse(self.sequence) | ||||
|         self.stream_resp['Content-Type'] = 'text/html; charset=UTF-8' | ||||
|         self.stream_resp_unicode = StreamingHttpResponse(self.sequence_unicode) | ||||
|         self.stream_resp_unicode['Content-Type'] = 'text/html; charset=UTF-8' | ||||
|  | ||||
|     @staticmethod | ||||
|     def decompress(gzipped_string): | ||||
| @@ -624,6 +627,15 @@ class GZipMiddlewareTest(TestCase): | ||||
|         self.assertEqual(r.get('Content-Encoding'), 'gzip') | ||||
|         self.assertFalse(r.has_header('Content-Length')) | ||||
|  | ||||
|     def test_compress_streaming_response_unicode(self): | ||||
|         """ | ||||
|         Tests that compression is performed on responses with streaming Unicode content. | ||||
|         """ | ||||
|         r = GZipMiddleware().process_response(self.req, self.stream_resp_unicode) | ||||
|         self.assertEqual(self.decompress(b''.join(r)), b''.join(x.encode('utf-8') for x in self.sequence_unicode)) | ||||
|         self.assertEqual(r.get('Content-Encoding'), 'gzip') | ||||
|         self.assertFalse(r.has_header('Content-Length')) | ||||
|  | ||||
|     def test_compress_file_response(self): | ||||
|         """ | ||||
|         Tests that compression is performed on FileResponse. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user