mirror of
				https://github.com/django/django.git
				synced 2025-10-24 22:26:08 +00:00 
			
		
		
		
	Fixed #9886 -- Added a file-like interface to HttpRequest. Thanks to Ivan Sagalaev for the suggestion and patch.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@14394 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
		| @@ -42,6 +42,8 @@ class ModPythonRequest(http.HttpRequest): | ||||
|             # naughty, but also pretty harmless. | ||||
|             self.path_info = u'/' | ||||
|         self._post_parse_error = False | ||||
|         self._stream = self._req | ||||
|         self._read_started = False | ||||
|  | ||||
|     def __repr__(self): | ||||
|         # Since this is called as part of error handling, we need to be very | ||||
| @@ -81,26 +83,6 @@ class ModPythonRequest(http.HttpRequest): | ||||
|             # mod_python < 3.2.10 doesn't have req.is_https(). | ||||
|             return self._req.subprocess_env.get('HTTPS', '').lower() in ('on', '1') | ||||
|  | ||||
|     def _load_post_and_files(self): | ||||
|         "Populates self._post and self._files" | ||||
|         if self.method != 'POST': | ||||
|             self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict() | ||||
|             return | ||||
|  | ||||
|         if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'): | ||||
|             self._raw_post_data = '' | ||||
|             try: | ||||
|                 self._post, self._files = self.parse_file_upload(self.META, self._req) | ||||
|             except: | ||||
|                 # See django.core.handlers.wsgi.WSGIHandler for an explanation | ||||
|                 # of what's going on here. | ||||
|                 self._post = http.QueryDict('') | ||||
|                 self._files = datastructures.MultiValueDict() | ||||
|                 self._post_parse_error = True | ||||
|                 raise | ||||
|         else: | ||||
|             self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() | ||||
|  | ||||
|     def _get_request(self): | ||||
|         if not hasattr(self, '_request'): | ||||
|             self._request = datastructures.MergeDict(self.POST, self.GET) | ||||
| @@ -162,13 +144,6 @@ class ModPythonRequest(http.HttpRequest): | ||||
|                 self._meta[key] = value | ||||
|         return self._meta | ||||
|  | ||||
|     def _get_raw_post_data(self): | ||||
|         try: | ||||
|             return self._raw_post_data | ||||
|         except AttributeError: | ||||
|             self._raw_post_data = self._req.read() | ||||
|             return self._raw_post_data | ||||
|  | ||||
|     def _get_method(self): | ||||
|         return self.META['REQUEST_METHOD'].upper() | ||||
|  | ||||
| @@ -178,7 +153,6 @@ class ModPythonRequest(http.HttpRequest): | ||||
|     FILES = property(_get_files) | ||||
|     META = property(_get_meta) | ||||
|     REQUEST = property(_get_request) | ||||
|     raw_post_data = property(_get_raw_post_data) | ||||
|     method = property(_get_method) | ||||
|  | ||||
| class ModPythonHandler(BaseHandler): | ||||
|   | ||||
| @@ -5,6 +5,7 @@ try: | ||||
|     from cStringIO import StringIO | ||||
| except ImportError: | ||||
|     from StringIO import StringIO | ||||
| import socket | ||||
|  | ||||
| from django import http | ||||
| from django.core import signals | ||||
| @@ -62,20 +63,55 @@ STATUS_CODE_TEXT = { | ||||
|     505: 'HTTP VERSION NOT SUPPORTED', | ||||
| } | ||||
|  | ||||
| def safe_copyfileobj(fsrc, fdst, length=16*1024, size=0): | ||||
|     """ | ||||
|     A version of shutil.copyfileobj that will not read more than 'size' bytes. | ||||
|     This makes it safe from clients sending more than CONTENT_LENGTH bytes of | ||||
|     data in the body. | ||||
|     """ | ||||
|     if not size: | ||||
|         return | ||||
|     while size > 0: | ||||
|         buf = fsrc.read(min(length, size)) | ||||
|         if not buf: | ||||
| class LimitedStream(object): | ||||
|     ''' | ||||
|     LimitedStream wraps another stream in order to not allow reading from it | ||||
|     past specified amount of bytes. | ||||
|     ''' | ||||
|     def __init__(self, stream, limit, buf_size=64 * 1024 * 1024): | ||||
|         self.stream = stream | ||||
|         self.remaining = limit | ||||
|         self.buffer = '' | ||||
|         self.buf_size = buf_size | ||||
|  | ||||
|     def _read_limited(self, size=None): | ||||
|         if size is None or size > self.remaining: | ||||
|             size = self.remaining | ||||
|         if size == 0: | ||||
|             return '' | ||||
|         result = self.stream.read(size) | ||||
|         self.remaining -= len(result) | ||||
|         return result | ||||
|  | ||||
|     def read(self, size=None): | ||||
|         if size is None: | ||||
|             result = self.buffer + self._read_limited() | ||||
|             self.buffer = '' | ||||
|         elif size < len(self.buffer): | ||||
|             result = self.buffer[:size] | ||||
|             self.buffer = self.buffer[size:] | ||||
|         else: # size >= len(self.buffer) | ||||
|             result = self.buffer + self._read_limited(size - len(self.buffer)) | ||||
|             self.buffer = '' | ||||
|         return result | ||||
|  | ||||
|     def readline(self, size=None): | ||||
|         while '\n' not in self.buffer or \ | ||||
|               (size is not None and len(self.buffer) < size): | ||||
|             if size: | ||||
|                 chunk = self._read_limited(size - len(self.buffer)) | ||||
|             else: | ||||
|                 chunk = self._read_limited() | ||||
|             if not chunk: | ||||
|                 break | ||||
|         fdst.write(buf) | ||||
|         size -= len(buf) | ||||
|             self.buffer += chunk | ||||
|         sio = StringIO(self.buffer) | ||||
|         if size: | ||||
|             line = sio.readline(size) | ||||
|         else: | ||||
|             line = sio.readline() | ||||
|         self.buffer = sio.read() | ||||
|         return line | ||||
|  | ||||
| class WSGIRequest(http.HttpRequest): | ||||
|     def __init__(self, environ): | ||||
| @@ -98,6 +134,24 @@ class WSGIRequest(http.HttpRequest): | ||||
|         self.META['SCRIPT_NAME'] = script_name | ||||
|         self.method = environ['REQUEST_METHOD'].upper() | ||||
|         self._post_parse_error = False | ||||
|         if isinstance(self.environ['wsgi.input'], socket._fileobject): | ||||
|             # Under development server 'wsgi.input' is an instance of | ||||
|             # socket._fileobject which hangs indefinitely on reading bytes past | ||||
|             # available count. To prevent this it's wrapped in LimitedStream | ||||
|             # that doesn't read past Content-Length bytes. | ||||
|             # | ||||
|             # This is not done for other kinds of inputs (like flup's FastCGI | ||||
|             # streams) beacuse they don't suffer from this problem and we can | ||||
|             # avoid using another wrapper with its own .read and .readline | ||||
|             # implementation. | ||||
|             try: | ||||
|                 content_length = int(self.environ.get('CONTENT_LENGTH', 0)) | ||||
|             except (ValueError, TypeError): | ||||
|                 content_length = 0 | ||||
|             self._stream = LimitedStream(self.environ['wsgi.input'], content_length) | ||||
|         else: | ||||
|             self._stream = self.environ['wsgi.input'] | ||||
|         self._read_started = False | ||||
|  | ||||
|     def __repr__(self): | ||||
|         # Since this is called as part of error handling, we need to be very | ||||
| @@ -133,30 +187,6 @@ class WSGIRequest(http.HttpRequest): | ||||
|         return 'wsgi.url_scheme' in self.environ \ | ||||
|             and self.environ['wsgi.url_scheme'] == 'https' | ||||
|  | ||||
|     def _load_post_and_files(self): | ||||
|         # Populates self._post and self._files | ||||
|         if self.method == 'POST': | ||||
|             if self.environ.get('CONTENT_TYPE', '').startswith('multipart'): | ||||
|                 self._raw_post_data = '' | ||||
|                 try: | ||||
|                     self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input']) | ||||
|                 except: | ||||
|                     # An error occured while parsing POST data.  Since when | ||||
|                     # formatting the error the request handler might access | ||||
|                     # self.POST, set self._post and self._file to prevent | ||||
|                     # attempts to parse POST data again. | ||||
|                     self._post = http.QueryDict('') | ||||
|                     self._files = datastructures.MultiValueDict() | ||||
|                     # Mark that an error occured.  This allows self.__repr__ to | ||||
|                     # be explicit about it instead of simply representing an | ||||
|                     # empty POST | ||||
|                     self._post_parse_error = True | ||||
|                     raise | ||||
|             else: | ||||
|                 self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict() | ||||
|         else: | ||||
|             self._post, self._files = http.QueryDict('', encoding=self._encoding), datastructures.MultiValueDict() | ||||
|  | ||||
|     def _get_request(self): | ||||
|         if not hasattr(self, '_request'): | ||||
|             self._request = datastructures.MergeDict(self.POST, self.GET) | ||||
| @@ -192,32 +222,11 @@ class WSGIRequest(http.HttpRequest): | ||||
|             self._load_post_and_files() | ||||
|         return self._files | ||||
|  | ||||
|     def _get_raw_post_data(self): | ||||
|         try: | ||||
|             return self._raw_post_data | ||||
|         except AttributeError: | ||||
|             buf = StringIO() | ||||
|             try: | ||||
|                 # CONTENT_LENGTH might be absent if POST doesn't have content at all (lighttpd) | ||||
|                 content_length = int(self.environ.get('CONTENT_LENGTH', 0)) | ||||
|             except (ValueError, TypeError): | ||||
|                 # If CONTENT_LENGTH was empty string or not an integer, don't | ||||
|                 # error out. We've also seen None passed in here (against all | ||||
|                 # specs, but see ticket #8259), so we handle TypeError as well. | ||||
|                 content_length = 0 | ||||
|             if content_length > 0: | ||||
|                 safe_copyfileobj(self.environ['wsgi.input'], buf, | ||||
|                         size=content_length) | ||||
|             self._raw_post_data = buf.getvalue() | ||||
|             buf.close() | ||||
|             return self._raw_post_data | ||||
|  | ||||
|     GET = property(_get_get, _set_get) | ||||
|     POST = property(_get_post, _set_post) | ||||
|     COOKIES = property(_get_cookies, _set_cookies) | ||||
|     FILES = property(_get_files) | ||||
|     REQUEST = property(_get_request) | ||||
|     raw_post_data = property(_get_raw_post_data) | ||||
|  | ||||
| class WSGIHandler(base.BaseHandler): | ||||
|     initLock = Lock() | ||||
|   | ||||
| @@ -6,6 +6,10 @@ from Cookie import BaseCookie, SimpleCookie, CookieError | ||||
| from pprint import pformat | ||||
| from urllib import urlencode | ||||
| from urlparse import urljoin | ||||
| try: | ||||
|     from cStringIO import StringIO | ||||
| except ImportError: | ||||
|     from StringIO import StringIO | ||||
| try: | ||||
|     # The mod_python version is more efficient, so try importing it first. | ||||
|     from mod_python.util import parse_qsl | ||||
| @@ -132,6 +136,73 @@ class HttpRequest(object): | ||||
|         parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding) | ||||
|         return parser.parse() | ||||
|  | ||||
|     def _get_raw_post_data(self): | ||||
|         if not hasattr(self, '_raw_post_data'): | ||||
|             if self._read_started: | ||||
|                 raise Exception("You cannot access raw_post_data after reading from request's data stream") | ||||
|             self._raw_post_data = self.read() | ||||
|             self._stream = StringIO(self._raw_post_data) | ||||
|         return self._raw_post_data | ||||
|     raw_post_data = property(_get_raw_post_data) | ||||
|  | ||||
|     def _mark_post_parse_error(self): | ||||
|         self._post = QueryDict('') | ||||
|         self._files = MultiValueDict() | ||||
|         self._post_parse_error = True | ||||
|  | ||||
|     def _load_post_and_files(self): | ||||
|         # Populates self._post and self._files | ||||
|         if self.method != 'POST': | ||||
|             self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict() | ||||
|             return | ||||
|         if self._read_started: | ||||
|             self._mark_post_parse_error() | ||||
|             return | ||||
|  | ||||
|         if self.META.get('CONTENT_TYPE', '').startswith('multipart'): | ||||
|             self._raw_post_data = '' | ||||
|             try: | ||||
|                 self._post, self._files = self.parse_file_upload(self.META, self) | ||||
|             except: | ||||
|                 # An error occured while parsing POST data.  Since when | ||||
|                 # formatting the error the request handler might access | ||||
|                 # self.POST, set self._post and self._file to prevent | ||||
|                 # attempts to parse POST data again. | ||||
|                 # Mark that an error occured.  This allows self.__repr__ to | ||||
|                 # be explicit about it instead of simply representing an | ||||
|                 # empty POST | ||||
|                 self._mark_post_parse_error() | ||||
|                 raise | ||||
|         else: | ||||
|             self._post, self._files = QueryDict(self.raw_post_data, encoding=self._encoding), MultiValueDict() | ||||
|  | ||||
|     ## File-like and iterator interface. | ||||
|     ## | ||||
|     ## Expects self._stream to be set to an appropriate source of bytes by | ||||
|     ## a corresponding request subclass (WSGIRequest or ModPythonRequest). | ||||
|     ## Also when request data has already been read by request.POST or | ||||
|     ## request.raw_post_data, self._stream points to a StringIO instance | ||||
|     ## containing that data. | ||||
|  | ||||
|     def read(self, *args, **kwargs): | ||||
|         self._read_started = True | ||||
|         return self._stream.read(*args, **kwargs) | ||||
|  | ||||
|     def readline(self, *args, **kwargs): | ||||
|         self._read_started = True | ||||
|         return self._stream.readline(*args, **kwargs) | ||||
|  | ||||
|     def xreadlines(self): | ||||
|         while True: | ||||
|             buf = self.readline() | ||||
|             if not buf: | ||||
|                 break | ||||
|             yield buf | ||||
|     __iter__ = xreadlines | ||||
|  | ||||
|     def readlines(self): | ||||
|         return list(iter(self)) | ||||
|  | ||||
| class QueryDict(MultiValueDict): | ||||
|     """ | ||||
|     A specialized MultiValueDict that takes a query string when initialized. | ||||
|   | ||||
| @@ -189,8 +189,14 @@ All attributes except ``session`` should be considered read-only. | ||||
|  | ||||
| .. attribute:: HttpRequest.raw_post_data | ||||
|  | ||||
|     The raw HTTP POST data. This is only useful for advanced processing. Use | ||||
|     ``POST`` instead. | ||||
|     The raw HTTP POST data as a byte string. This is useful for processing | ||||
|     data in different formats than of conventional HTML forms: binary images, | ||||
|     XML payload etc. For processing form data use ``HttpRequest.POST``. | ||||
|  | ||||
|     .. versionadded:: 1.3 | ||||
|  | ||||
|     You can also read from an HttpRequest using file-like interface. See | ||||
|     :meth:`HttpRequest.read()`. | ||||
|  | ||||
| .. attribute:: HttpRequest.urlconf | ||||
|  | ||||
| @@ -249,6 +255,27 @@ Methods | ||||
|    If you write your own XMLHttpRequest call (on the browser side), you'll | ||||
|    have to set this header manually if you want ``is_ajax()`` to work. | ||||
|  | ||||
| .. method:: HttpRequest.read(size=None) | ||||
| .. method:: HttpRequest.readline() | ||||
| .. method:: HttpRequest.readlines() | ||||
| .. method:: HttpRequest.xreadlines() | ||||
| .. method:: HttpRequest.__iter__() | ||||
|  | ||||
|     .. versionadded:: 1.3 | ||||
|  | ||||
|     Methods implementing a file-like interface for reading from an | ||||
|     HttpRequest instance. This makes it possible to consume an incoming | ||||
|     request in a streaming fashion. A common use-case would be to process a | ||||
|     big XML payload with iterative parser without constructing a whole | ||||
|     XML tree in memory. | ||||
|  | ||||
|     Given this standard interface, an HttpRequest instance can be | ||||
|     passed directly to an XML parser such as ElementTree:: | ||||
|  | ||||
|         import xml.etree.ElementTree as ET | ||||
|         for element in ET.iterparse(request): | ||||
|             process(element) | ||||
|  | ||||
|  | ||||
| QueryDict objects | ||||
| ----------------- | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| from datetime import datetime, timedelta | ||||
| import time | ||||
| from StringIO import StringIO | ||||
| import unittest | ||||
|  | ||||
| from django.http import HttpRequest, HttpResponse, parse_cookie | ||||
| from django.core.handlers.wsgi import WSGIRequest | ||||
| from django.core.handlers.wsgi import WSGIRequest, LimitedStream | ||||
| from django.core.handlers.modpython import ModPythonRequest | ||||
| from django.utils.http import cookie_date | ||||
|  | ||||
| @@ -17,11 +18,11 @@ class RequestsTests(unittest.TestCase): | ||||
|         self.assertEqual(request.META.keys(), []) | ||||
|  | ||||
|     def test_wsgirequest(self): | ||||
|         request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus'}) | ||||
|         request = WSGIRequest({'PATH_INFO': 'bogus', 'REQUEST_METHOD': 'bogus', 'wsgi.input': StringIO('')}) | ||||
|         self.assertEqual(request.GET.keys(), []) | ||||
|         self.assertEqual(request.POST.keys(), []) | ||||
|         self.assertEqual(request.COOKIES.keys(), []) | ||||
|         self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME'])) | ||||
|         self.assertEqual(set(request.META.keys()), set(['PATH_INFO', 'REQUEST_METHOD', 'SCRIPT_NAME', 'wsgi.input'])) | ||||
|         self.assertEqual(request.META['PATH_INFO'], 'bogus') | ||||
|         self.assertEqual(request.META['REQUEST_METHOD'], 'bogus') | ||||
|         self.assertEqual(request.META['SCRIPT_NAME'], '') | ||||
| @@ -88,3 +89,62 @@ class RequestsTests(unittest.TestCase): | ||||
|         max_age_cookie = response.cookies['max_age'] | ||||
|         self.assertEqual(max_age_cookie['max-age'], 10) | ||||
|         self.assertEqual(max_age_cookie['expires'], cookie_date(time.time()+10)) | ||||
|  | ||||
|     def test_limited_stream(self): | ||||
|         # Read all of a limited stream | ||||
|         stream = LimitedStream(StringIO('test'), 2) | ||||
|         self.assertEqual(stream.read(), 'te') | ||||
|  | ||||
|         # Read a number of characters greater than the stream has to offer | ||||
|         stream = LimitedStream(StringIO('test'), 2) | ||||
|         self.assertEqual(stream.read(5), 'te') | ||||
|  | ||||
|         # Read sequentially from a stream | ||||
|         stream = LimitedStream(StringIO('12345678'), 8) | ||||
|         self.assertEqual(stream.read(5), '12345') | ||||
|         self.assertEqual(stream.read(5), '678') | ||||
|  | ||||
|         # Read lines from a stream | ||||
|         stream = LimitedStream(StringIO('1234\n5678\nabcd\nefgh\nijkl'), 24) | ||||
|         # Read a full line, unconditionally | ||||
|         self.assertEqual(stream.readline(), '1234\n') | ||||
|         # Read a number of characters less than a line | ||||
|         self.assertEqual(stream.readline(2), '56') | ||||
|         # Read the rest of the partial line | ||||
|         self.assertEqual(stream.readline(), '78\n') | ||||
|         # Read a full line, with a character limit greater than the line length | ||||
|         self.assertEqual(stream.readline(6), 'abcd\n') | ||||
|         # Read the next line, deliberately terminated at the line end | ||||
|         self.assertEqual(stream.readline(4), 'efgh') | ||||
|         # Read the next line... just the line end | ||||
|         self.assertEqual(stream.readline(), '\n') | ||||
|         # Read everything else. | ||||
|         self.assertEqual(stream.readline(), 'ijkl') | ||||
|  | ||||
|     def test_stream(self): | ||||
|         request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) | ||||
|         self.assertEqual(request.read(), 'name=value') | ||||
|  | ||||
|     def test_read_after_value(self): | ||||
|         """ | ||||
|         Reading from request is allowed after accessing request contents as | ||||
|         POST or raw_post_data. | ||||
|         """ | ||||
|         request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) | ||||
|         self.assertEqual(request.POST, {u'name': [u'value']}) | ||||
|         self.assertEqual(request.raw_post_data, 'name=value') | ||||
|         self.assertEqual(request.read(), 'name=value') | ||||
|  | ||||
|     def test_value_after_read(self): | ||||
|         """ | ||||
|         Construction of POST or raw_post_data is not allowed after reading | ||||
|         from request. | ||||
|         """ | ||||
|         request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) | ||||
|         self.assertEqual(request.read(2), 'na') | ||||
|         self.assertRaises(Exception, lambda: request.raw_post_data) | ||||
|         self.assertEqual(request.POST, {}) | ||||
|  | ||||
|     def test_read_by_lines(self): | ||||
|         request = WSGIRequest({'REQUEST_METHOD': 'POST', 'wsgi.input': StringIO('name=value')}) | ||||
|         self.assertEqual(list(request), ['name=value']) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user