1
0
mirror of https://github.com/django/django.git synced 2025-10-25 14:46:09 +00:00

Fixed #2070: refactored Django's file upload capabilities.

A description of the new features can be found in the new [http://www.djangoproject.com/documentation/upload_handing/ upload handling documentation]; the executive summary is that Django will now happily handle uploads of large files without issues.

This changes the representation of uploaded files from dictionaries to bona fide objects; see BackwardsIncompatibleChanges for details.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@7814 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Jacob Kaplan-Moss
2008-07-01 15:10:51 +00:00
parent ef76102e89
commit d725cc9734
38 changed files with 2290 additions and 153 deletions

View File

View File

@@ -0,0 +1,66 @@
"""
Portable file locking utilities.
Based partially on example by Jonathan Feignberg <jdf@pobox.com> in the Python
Cookbook, licensed under the Python Software License.
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65203
Example Usage::
>>> from django.core.files import locks
>>> f = open('./file', 'wb')
>>> locks.lock(f, locks.LOCK_EX)
>>> f.write('Django')
>>> f.close()
"""
__all__ = ('LOCK_EX','LOCK_SH','LOCK_NB','lock','unlock')
system_type = None
try:
import win32con
import win32file
import pywintypes
LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK
LOCK_SH = 0
LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY
__overlapped = pywintypes.OVERLAPPED()
system_type = 'nt'
except (ImportError, AttributeError):
pass
try:
import fcntl
LOCK_EX = fcntl.LOCK_EX
LOCK_SH = fcntl.LOCK_SH
LOCK_NB = fcntl.LOCK_NB
system_type = 'posix'
except (ImportError, AttributeError):
pass
if system_type == 'nt':
def lock(file, flags):
hfile = win32file._get_osfhandle(file.fileno())
win32file.LockFileEx(hfile, flags, 0, -0x10000, __overlapped)
def unlock(file):
hfile = win32file._get_osfhandle(file.fileno())
win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped)
elif system_type == 'posix':
def lock(file, flags):
fcntl.flock(file.fileno(), flags)
def unlock(file):
fcntl.flock(file.fileno(), fcntl.LOCK_UN)
else:
# File locking is not supported.
LOCK_EX = LOCK_SH = LOCK_NB = None
# Dummy functions that don't do anything.
def lock(file, flags):
pass
def unlock(file):
pass

59
django/core/files/move.py Normal file
View File

@@ -0,0 +1,59 @@
"""
Move a file in the safest way possible::
>>> from django.core.files.move import file_move_save
>>> file_move_save("/tmp/old_file", "/tmp/new_file")
"""
import os
from django.core.files import locks
__all__ = ['file_move_safe']
try:
import shutil
file_move = shutil.move
except ImportError:
file_move = os.rename
def file_move_safe(old_file_name, new_file_name, chunk_size = 1024*64, allow_overwrite=False):
"""
Moves a file from one location to another in the safest way possible.
First, try using ``shutils.move``, which is OS-dependent but doesn't break
if moving across filesystems. Then, try ``os.rename``, which will break
across filesystems. Finally, streams manually from one file to another in
pure Python.
If the destination file exists and ``allow_overwrite`` is ``False``, this
function will throw an ``IOError``.
"""
# There's no reason to move if we don't have to.
if old_file_name == new_file_name:
return
if not allow_overwrite and os.path.exists(new_file_name):
raise IOError("Cannot overwrite existing file '%s'." % new_file_name)
try:
file_move(old_file_name, new_file_name)
return
except OSError:
# This will happen with os.rename if moving to another filesystem
pass
# If the built-in didn't work, do it the hard way.
new_file = open(new_file_name, 'wb')
locks.lock(new_file, locks.LOCK_EX)
old_file = open(old_file_name, 'rb')
current_chunk = None
while current_chunk != '':
current_chunk = old_file.read(chunk_size)
new_file.write(current_chunk)
new_file.close()
old_file.close()
os.remove(old_file_name)

View File

@@ -0,0 +1,190 @@
"""
Classes representing uploaded files.
"""
import os
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
__all__ = ('UploadedFile', 'TemporaryUploadedFile', 'InMemoryUploadedFile')
class UploadedFile(object):
"""
A abstract uploadded file (``TemporaryUploadedFile`` and
``InMemoryUploadedFile`` are the built-in concrete subclasses).
An ``UploadedFile`` object behaves somewhat like a file object and
represents some file data that the user submitted with a form.
"""
DEFAULT_CHUNK_SIZE = 64 * 2**10
def __init__(self, file_name=None, content_type=None, file_size=None, charset=None):
self.file_name = file_name
self.file_size = file_size
self.content_type = content_type
self.charset = charset
def __repr__(self):
return "<%s: %s (%s)>" % (self.__class__.__name__, self.file_name, self.content_type)
def _set_file_name(self, name):
# Sanitize the file name so that it can't be dangerous.
if name is not None:
# Just use the basename of the file -- anything else is dangerous.
name = os.path.basename(name)
# File names longer than 255 characters can cause problems on older OSes.
if len(name) > 255:
name, ext = os.path.splitext(name)
name = name[:255 - len(ext)] + ext
self._file_name = name
def _get_file_name(self):
return self._file_name
file_name = property(_get_file_name, _set_file_name)
def chunk(self, chunk_size=None):
"""
Read the file and yield chucks of ``chunk_size`` bytes (defaults to
``UploadedFile.DEFAULT_CHUNK_SIZE``).
"""
if not chunk_size:
chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
if hasattr(self, 'seek'):
self.seek(0)
# Assume the pointer is at zero...
counter = self.file_size
while counter > 0:
yield self.read(chunk_size)
counter -= chunk_size
def multiple_chunks(self, chunk_size=None):
"""
Returns ``True`` if you can expect multiple chunks.
NB: If a particular file representation is in memory, subclasses should
always return ``False`` -- there's no good reason to read from memory in
chunks.
"""
if not chunk_size:
chunk_size = UploadedFile.DEFAULT_CHUNK_SIZE
return self.file_size < chunk_size
# Abstract methods; subclasses *must* default read() and probably should
# define open/close.
def read(self, num_bytes=None):
raise NotImplementedError()
def open(self):
pass
def close(self):
pass
# Backwards-compatible support for uploaded-files-as-dictionaries.
def __getitem__(self, key):
import warnings
warnings.warn(
message = "The dictionary access of uploaded file objects is deprecated. Use the new object interface instead.",
category = DeprecationWarning,
stacklevel = 2
)
backwards_translate = {
'filename': 'file_name',
'content-type': 'content_type',
}
if key == 'content':
return self.read()
elif key == 'filename':
return self.file_name
elif key == 'content-type':
return self.content_type
else:
return getattr(self, key)
class TemporaryUploadedFile(UploadedFile):
"""
A file uploaded to a temporary location (i.e. stream-to-disk).
"""
def __init__(self, file, file_name, content_type, file_size, charset):
super(TemporaryUploadedFile, self).__init__(file_name, content_type, file_size, charset)
self.file = file
self.path = file.name
self.file.seek(0)
def temporary_file_path(self):
"""
Returns the full path of this file.
"""
return self.path
def read(self, *args, **kwargs):
return self.file.read(*args, **kwargs)
def open(self):
self.seek(0)
def seek(self, *args, **kwargs):
self.file.seek(*args, **kwargs)
class InMemoryUploadedFile(UploadedFile):
"""
A file uploaded into memory (i.e. stream-to-memory).
"""
def __init__(self, file, field_name, file_name, content_type, charset, file_size):
super(InMemoryUploadedFile, self).__init__(file_name, content_type, charset, file_size)
self.file = file
self.field_name = field_name
self.file.seek(0)
def seek(self, *args, **kwargs):
self.file.seek(*args, **kwargs)
def open(self):
self.seek(0)
def read(self, *args, **kwargs):
return self.file.read(*args, **kwargs)
def chunk(self, chunk_size=None):
self.file.seek(0)
yield self.read()
def multiple_chunks(self, chunk_size=None):
# Since it's in memory, we'll never have multiple chunks.
return False
class SimpleUploadedFile(InMemoryUploadedFile):
"""
A simple representation of a file, which just has content, size, and a name.
"""
def __init__(self, name, content, content_type='text/plain'):
self.file = StringIO(content or '')
self.file_name = name
self.field_name = None
self.file_size = len(content or '')
self.content_type = content_type
self.charset = None
self.file.seek(0)
def from_dict(cls, file_dict):
"""
Creates a SimpleUploadedFile object from
a dictionary object with the following keys:
- filename
- content-type
- content
"""
return cls(file_dict['filename'],
file_dict['content'],
file_dict.get('content-type', 'text/plain'))
from_dict = classmethod(from_dict)

View File

@@ -0,0 +1,235 @@
"""
Base file upload handler classes, and the built-in concrete subclasses
"""
import os
import tempfile
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.core.files.uploadedfile import TemporaryUploadedFile, InMemoryUploadedFile
__all__ = ['UploadFileException','StopUpload', 'SkipFile', 'FileUploadHandler',
'TemporaryFileUploadHandler', 'MemoryFileUploadHandler',
'load_handler']
class UploadFileException(Exception):
"""
Any error having to do with uploading files.
"""
pass
class StopUpload(UploadFileException):
"""
This exception is raised when an upload must abort.
"""
def __init__(self, connection_reset=False):
"""
If ``connection_reset`` is ``True``, Django knows will halt the upload
without consuming the rest of the upload. This will cause the browser to
show a "connection reset" error.
"""
self.connection_reset = connection_reset
def __unicode__(self):
if self.connection_reset:
return u'StopUpload: Halt current upload.'
else:
return u'StopUpload: Consume request data, then halt.'
class SkipFile(UploadFileException):
"""
This exception is raised by an upload handler that wants to skip a given file.
"""
pass
class StopFutureHandlers(UploadFileException):
"""
Upload handers that have handled a file and do not want future handlers to
run should raise this exception instead of returning None.
"""
pass
class FileUploadHandler(object):
"""
Base class for streaming upload handlers.
"""
chunk_size = 64 * 2 ** 10 #: The default chunk size is 64 KB.
def __init__(self, request=None):
self.file_name = None
self.content_type = None
self.content_length = None
self.charset = None
self.request = request
def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None):
"""
Handle the raw input from the client.
Parameters:
:input_data:
An object that supports reading via .read().
:META:
``request.META``.
:content_length:
The (integer) value of the Content-Length header from the
client.
:boundary: The boundary from the Content-Type header. Be sure to
prepend two '--'.
"""
pass
def new_file(self, field_name, file_name, content_type, content_length, charset=None):
"""
Signal that a new file has been started.
Warning: As with any data from the client, you should not trust
content_length (and sometimes won't even get it).
"""
self.field_name = field_name
self.file_name = file_name
self.content_type = content_type
self.content_length = content_length
self.charset = charset
def receive_data_chunk(self, raw_data, start):
"""
Receive data from the streamed upload parser. ``start`` is the position
in the file of the chunk.
"""
raise NotImplementedError()
def file_complete(self, file_size):
"""
Signal that a file has completed. File size corresponds to the actual
size accumulated by all the chunks.
Subclasses must should return a valid ``UploadedFile`` object.
"""
raise NotImplementedError()
def upload_complete(self):
"""
Signal that the upload is complete. Subclasses should perform cleanup
that is necessary for this handler.
"""
pass
class TemporaryFileUploadHandler(FileUploadHandler):
"""
Upload handler that streams data into a temporary file.
"""
def __init__(self, *args, **kwargs):
super(TemporaryFileUploadHandler, self).__init__(*args, **kwargs)
def new_file(self, file_name, *args, **kwargs):
"""
Create the file object to append to as data is coming in.
"""
super(TemporaryFileUploadHandler, self).new_file(file_name, *args, **kwargs)
self.file = TemporaryFile(settings.FILE_UPLOAD_TEMP_DIR)
self.write = self.file.write
def receive_data_chunk(self, raw_data, start):
self.write(raw_data)
def file_complete(self, file_size):
self.file.seek(0)
return TemporaryUploadedFile(self.file, self.file_name,
self.content_type, file_size,
self.charset)
class MemoryFileUploadHandler(FileUploadHandler):
"""
File upload handler to stream uploads into memory (used for small files).
"""
def handle_raw_input(self, input_data, META, content_length, boundary, encoding=None):
"""
Use the content_length to signal whether or not this handler should be in use.
"""
# Check the content-length header to see if we should
# If the the post is too large, we cannot use the Memory handler.
if content_length > settings.FILE_UPLOAD_MAX_MEMORY_SIZE:
self.activated = False
else:
self.activated = True
def new_file(self, *args, **kwargs):
super(MemoryFileUploadHandler, self).new_file(*args, **kwargs)
if self.activated:
self.file = StringIO()
raise StopFutureHandlers()
def receive_data_chunk(self, raw_data, start):
"""
Add the data to the StringIO file.
"""
if self.activated:
self.file.write(raw_data)
else:
return raw_data
def file_complete(self, file_size):
"""
Return a file object if we're activated.
"""
if not self.activated:
return
return InMemoryUploadedFile(self.file, self.field_name, self.file_name,
self.content_type, self.charset, file_size)
class TemporaryFile(object):
"""
A temporary file that tries to delete itself when garbage collected.
"""
def __init__(self, dir):
if not dir:
dir = tempfile.gettempdir()
try:
(fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir)
self.file = os.fdopen(fd, 'w+b')
except (OSError, IOError):
raise OSError("Could not create temporary file for uploading, have you set settings.FILE_UPLOAD_TEMP_DIR correctly?")
self.name = name
def __getattr__(self, name):
a = getattr(self.__dict__['file'], name)
if type(a) != type(0):
setattr(self, name, a)
return a
def __del__(self):
try:
os.unlink(self.name)
except OSError:
pass
def load_handler(path, *args, **kwargs):
"""
Given a path to a handler, return an instance of that handler.
E.g.::
>>> load_handler('django.core.files.uploadhandler.TemporaryFileUploadHandler', request)
<TemporaryFileUploadHandler object at 0x...>
"""
i = path.rfind('.')
module, attr = path[:i], path[i+1:]
try:
mod = __import__(module, {}, {}, [attr])
except ImportError, e:
raise ImproperlyConfigured('Error importing upload handler module %s: "%s"' % (module, e))
except ValueError, e:
raise ImproperlyConfigured('Error importing upload handler module. Is FILE_UPLOAD_HANDLERS a correctly defined list or tuple?')
try:
cls = getattr(mod, attr)
except AttributeError:
raise ImproperlyConfigured('Module "%s" does not define a "%s" upload handler backend' % (module, attr))
return cls(*args, **kwargs)

View File

@@ -53,7 +53,8 @@ class ModPythonRequest(http.HttpRequest):
def _load_post_and_files(self):
"Populates self._post and self._files"
if 'content-type' in self._req.headers_in and self._req.headers_in['content-type'].startswith('multipart'):
self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data)
self._raw_post_data = ''
self._post, self._files = self.parse_file_upload(self.META, self._req)
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()

View File

@@ -112,9 +112,8 @@ class WSGIRequest(http.HttpRequest):
# Populates self._post and self._files
if self.method == 'POST':
if self.environ.get('CONTENT_TYPE', '').startswith('multipart'):
header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')])
header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '')
self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data)
self._raw_post_data = ''
self._post, self._files = self.parse_file_upload(self.META, self.environ['wsgi.input'])
else:
self._post, self._files = http.QueryDict(self.raw_post_data, encoding=self._encoding), datastructures.MultiValueDict()
else: