1
0
mirror of https://github.com/django/django.git synced 2025-10-26 15:16:09 +00:00

queryset-refactor: Converted the queryset iterator to be a real iterator and

only populate the result cache on demand. We actually populate the result cache
100 elements at a time, rather than one at a time for efficiency, but this is a
real win when the resultset contains 10,000 objects for example.

This also provides an efficient boolean (__nonzero__) test that doesn't use up
a lot of memory if you don't read all the results.

Refs #2430, #5987.


git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7030 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Malcolm Tredinnick
2008-01-26 13:23:54 +00:00
parent 98abf27535
commit dd2251a653
2 changed files with 85 additions and 7 deletions

View File

@@ -21,6 +21,7 @@ except NameError:
# Used to control how many objects are worked with at once in some cases (e.g. # Used to control how many objects are worked with at once in some cases (e.g.
# when deleting objects). # when deleting objects).
CHUNK_SIZE = 100 CHUNK_SIZE = 100
ITER_CHUNK_SIZE = CHUNK_SIZE
class _QuerySet(object): class _QuerySet(object):
"Represents a lazy database lookup for a set of objects" "Represents a lazy database lookup for a set of objects"
@@ -28,19 +29,40 @@ class _QuerySet(object):
self.model = model self.model = model
self.query = query or sql.Query(self.model, connection) self.query = query or sql.Query(self.model, connection)
self._result_cache = None self._result_cache = None
self._iter = None
######################## ########################
# PYTHON MAGIC METHODS # # PYTHON MAGIC METHODS #
######################## ########################
def __repr__(self): def __repr__(self):
return repr(self._get_data()) return repr(list(iter(self)))
def __len__(self): def __len__(self):
return len(self._get_data()) return len(list(iter(self)))
def __iter__(self): def __iter__(self):
return iter(self._get_data()) pos = 0
if self._result_cache is None:
self._iter = self.iterator()
self._result_cache = []
while 1:
upper = len(self._result_cache)
while pos < upper:
yield self._result_cache[pos]
pos = pos + 1
if not self._iter:
raise StopIteration
if len(self._result_cache) <= pos:
self._fill_cache()
def __nonzero__(self):
if self._result_cache is None:
try:
iter(self).next()
except StopIteration:
return False
return True
def __getitem__(self, k): def __getitem__(self, k):
"Retrieve an item or slice from the set of results." "Retrieve an item or slice from the set of results."
@@ -52,6 +74,15 @@ class _QuerySet(object):
"Negative indexing is not supported." "Negative indexing is not supported."
if self._result_cache is not None: if self._result_cache is not None:
if self._iter is not None:
# The result cache has only been partially populated, so we may
# need to fill it out a bit more.
if isinstance(k, slice):
bound = k.stop
else:
bound = k + 1
if len(self._result_cache) < bound:
self._fill_cache(bound - len(self._result_cache))
return self._result_cache[k] return self._result_cache[k]
if isinstance(k, slice): if isinstance(k, slice):
@@ -375,10 +406,17 @@ class _QuerySet(object):
c._setup_query() c._setup_query()
return c return c
def _get_data(self): def _fill_cache(self, num=None):
if self._result_cache is None: """
self._result_cache = list(self.iterator()) Fills the result cache with 'num' more entries (or until the results
return self._result_cache iterator is exhausted).
"""
if self._iter:
try:
for i in range(num or ITER_CHUNK_SIZE):
self._result_cache.append(self._iter.next())
except StopIteration:
self._iter = None
# Use the backend's QuerySet class if it defines one. Otherwise, use _QuerySet. # Use the backend's QuerySet class if it defines one. Otherwise, use _QuerySet.
if connection.features.uses_custom_queryset: if connection.features.uses_custom_queryset:
@@ -395,6 +433,9 @@ class ValuesQuerySet(QuerySet):
# QuerySet.clone() will also set up the _fields attribute with the # QuerySet.clone() will also set up the _fields attribute with the
# names of the model fields to select. # names of the model fields to select.
def __iter__(self):
return self.iterator()
def iterator(self): def iterator(self):
self.field_names.extend([f for f in self.query.extra_select.keys()]) self.field_names.extend([f for f in self.query.extra_select.keys()])
for row in self.query.results_iter(): for row in self.query.results_iter():

View File

@@ -501,5 +501,42 @@ Bug #6203
2 2
>>> len(Item.objects.dates('created', 'day')) >>> len(Item.objects.dates('created', 'day'))
2 2
Test that parallel iterators work.
>>> qs = Tag.objects.all()
>>> i1, i2 = iter(qs), iter(qs)
>>> i1.next(), i1.next()
(<Tag: t1>, <Tag: t2>)
>>> i2.next(), i2.next(), i2.next()
(<Tag: t1>, <Tag: t2>, <Tag: t3>)
>>> i1.next()
<Tag: t3>
We can do slicing beyond what is currently in the result cache, too.
# We need to mess with the implemenation internals a bit here to decrease the
# cache fill size so that we don't read all the results at once.
>>> from django.db.models import query
>>> query.ITER_CHUNK_SIZE = 2
>>> qs = Tag.objects.all()
# Fill the cache with the first chunk.
>>> bool(qs)
True
>>> len(qs._result_cache)
2
# Query beyond the end of the cache and check that it is filled out as required.
>>> qs[4]
<Tag: t5>
>>> len(qs._result_cache)
5
# But querying beyond the end of the result set will fail.
>>> qs[100]
Traceback (most recent call last):
...
IndexError: ...
"""} """}