mirror of
				https://github.com/django/django.git
				synced 2025-10-24 22:26:08 +00:00 
			
		
		
		
	Fixed #2628 -- Added django.contrib.sitemap. Thanks for the patch, Dan Watson
git-svn-id: http://code.djangoproject.com/svn/django/trunk@3694 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
		
							
								
								
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -139,6 +139,7 @@ answer newbie questions, and generally made Django that much better: | ||||
|     Amit Upadhyay | ||||
|     Geert Vanderkelen | ||||
|     Milton Waddams | ||||
|     Dan Watson <http://theidioteque.net/> | ||||
|     Rachel Willmer <http://www.willmer.com/kb/> | ||||
|     wojtek | ||||
|     ye7cakf02@sneakemail.com | ||||
|   | ||||
							
								
								
									
										90
									
								
								django/contrib/sitemap/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								django/contrib/sitemap/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,90 @@ | ||||
| from django.core import urlresolvers | ||||
| import urllib | ||||
|  | ||||
| PING_URL = "http://www.google.com/webmasters/sitemaps/ping" | ||||
|  | ||||
| class SitemapNotFound(Exception): | ||||
|     pass | ||||
|  | ||||
| def ping_google(sitemap_url=None, ping_url=PING_URL): | ||||
|     """ | ||||
|     Alerts Google that the sitemap for the current site has been updated. | ||||
|     If sitemap_url is provided, it should be an absolute path to the sitemap | ||||
|     for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this | ||||
|     function will attempt to deduce it by using urlresolvers.reverse(). | ||||
|     """ | ||||
|     if sitemap_url is None: | ||||
|         try: | ||||
|             # First, try to get the "index" sitemap URL. | ||||
|             sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.index') | ||||
|         except urlresolvers.NoReverseMatch: | ||||
|             try: | ||||
|                 # Next, try for the "global" sitemap URL. | ||||
|                 sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.sitemap') | ||||
|             except urlresolvers.NoReverseMatch: | ||||
|                 pass | ||||
|  | ||||
|     if sitemap_url is None: | ||||
|         raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.") | ||||
|  | ||||
|     from django.contrib.sites.models import Site | ||||
|     current_site = Site.objects.get_current() | ||||
|     url = "%s%s" % (current_site.domain, sitemap) | ||||
|     params = urllib.urlencode({'sitemap':url}) | ||||
|     urllib.urlopen("%s?%s" % (ping_url, params)) | ||||
|  | ||||
| class Sitemap: | ||||
|     def __get(self, name, obj, default=None): | ||||
|         try: | ||||
|             attr = getattr(self, name) | ||||
|         except AttributeError: | ||||
|             return default | ||||
|         if callable(attr): | ||||
|             return attr(obj) | ||||
|         return attr | ||||
|  | ||||
|     def items(self): | ||||
|         return [] | ||||
|  | ||||
|     def location(self, obj): | ||||
|         return obj.get_absolute_url() | ||||
|  | ||||
|     def get_urls(self): | ||||
|         from django.contrib.sites.models import Site | ||||
|         current_site = Site.objects.get_current() | ||||
|         urls = [] | ||||
|         for item in self.items(): | ||||
|             loc = "http://%s%s" % (current_site.domain, self.__get('location', item)) | ||||
|             url_info = { | ||||
|                 'location':   loc, | ||||
|                 'lastmod':    self.__get('lastmod', item, None), | ||||
|                 'changefreq': self.__get('changefreq', item, None), | ||||
|                 'priority':   self.__get('priority', item, None) | ||||
|             } | ||||
|             urls.append(url_info) | ||||
|         return urls | ||||
|  | ||||
| class FlatpageSitemap(Sitemap): | ||||
|     def items(self): | ||||
|         from django.contrib.sites.models import Site | ||||
|         current_site = Site.objects.get_current() | ||||
|         return current_site.flatpage_set.all() | ||||
|  | ||||
| class GenericSitemap(Sitemap): | ||||
|     priority = None | ||||
|     changefreq = None | ||||
|  | ||||
|     def __init__(self, info_dict, priority=None, changefreq=None): | ||||
|         self.queryset = info_dict['queryset'] | ||||
|         self.date_field = info_dict.get('date_field', None) | ||||
|         self.priority = priority | ||||
|         self.changefreq = changefreq | ||||
|  | ||||
|     def items(self): | ||||
|         # Make sure to return a clone; we don't want premature evaluation. | ||||
|         return self.queryset.filter() | ||||
|  | ||||
|     def lastmod(self, item): | ||||
|         if self.date_field is not None: | ||||
|             return getattr(item, self.date_field) | ||||
|         return None | ||||
							
								
								
									
										11
									
								
								django/contrib/sitemap/templates/sitemap.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								django/contrib/sitemap/templates/sitemap.xml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <urlset xmlns="http://www.google.com/schemas/sitemap/0.84"> | ||||
| {% for url in urlset %} | ||||
|   <url> | ||||
|     <loc>{{ url.location|escape }}</loc> | ||||
|     {% if url.lastmod %}<lastmod>{{ url.lastmod|date:"Y-m-d" }}</lastmod>{% endif %} | ||||
|     {% if url.changefreq %}<changefreq>{{ url.changefreq }}</changefreq>{% endif %} | ||||
|     {% if url.priority %}<priority>{{ url.priority }}</priority>{% endif %} | ||||
|    </url> | ||||
| {% endfor %} | ||||
| </urlset> | ||||
							
								
								
									
										8
									
								
								django/contrib/sitemap/templates/sitemap_index.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								django/contrib/sitemap/templates/sitemap_index.xml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <sitemapindex xmlns="http://www.google.com/schemas/sitemap/0.84"> | ||||
| {% for location in sitemaps %} | ||||
| 	<sitemap> | ||||
| 		<loc>{{ location|escape }}</loc> | ||||
| 	</sitemap> | ||||
| {% endfor %} | ||||
| </sitemapindex> | ||||
							
								
								
									
										30
									
								
								django/contrib/sitemap/views.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								django/contrib/sitemap/views.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| from django.http import HttpResponse, Http404 | ||||
| from django.template import loader | ||||
| from django.contrib.sites.models import Site | ||||
| from django.core import urlresolvers | ||||
|  | ||||
| def index(request, sitemaps): | ||||
|     current_site = Site.objects.get_current() | ||||
|     sites = [] | ||||
|     protocol = request.is_secure() and 'https' or 'http' | ||||
|     for section in sitemaps.keys(): | ||||
|         sitemap_url = urlresolvers.reverse('django.contrib.sitemap.views.sitemap', kwargs={'section': section}) | ||||
|         sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url)) | ||||
|     xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites}) | ||||
|     return HttpResponse(xml, mimetype='application/xml') | ||||
|  | ||||
| def sitemap(request, sitemaps, section=None): | ||||
|     maps, urls = [], [] | ||||
|     if section is not None: | ||||
|         if not sitemaps.has_key(section): | ||||
|             raise Http404("No sitemap available for section: %r" % section) | ||||
|         maps.append(sitemaps[section]) | ||||
|     else: | ||||
|         maps = sitemaps.values() | ||||
|     for site in maps: | ||||
|         if callable(site): | ||||
|             urls.extend(site().get_urls()) | ||||
|         else: | ||||
|             urls.extend(site.get_urls()) | ||||
|     xml = loader.render_to_string('sitemap.xml', {'urlset': urls}) | ||||
|     return HttpResponse(xml, mimetype='application/xml') | ||||
							
								
								
									
										318
									
								
								docs/sitemaps.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										318
									
								
								docs/sitemaps.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,318 @@ | ||||
| ===================== | ||||
| The sitemap framework | ||||
| ===================== | ||||
|  | ||||
| Django comes with a high-level sitemap-generating framework that makes | ||||
| creating `Google Sitemap`_ XML files easy. | ||||
|  | ||||
| .. _Google Sitemap: http://www.google.com/webmasters/sitemaps/docs/en/protocol.html | ||||
|  | ||||
| Overview | ||||
| ======== | ||||
|  | ||||
| A sitemap is an XML file on your Web site that tells search-engine indexers how | ||||
| frequently your pages change and how "important" certain pages are in relation | ||||
| to other pages on your site. This information helps search engines index your | ||||
| site. | ||||
|  | ||||
| The Django sitemap framework automates the creation of this XML file by letting | ||||
| you express this information in Python code. | ||||
|  | ||||
| It works much like Django's `syndication framework`_. To create a sitemap, just | ||||
| write a ``Sitemap`` class and point to it in your URLconf_. | ||||
|  | ||||
| .. _syndication framework: http://www.djangoproject.com/documentation/syndication/ | ||||
| .. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/ | ||||
|  | ||||
| Installation | ||||
| ============ | ||||
|  | ||||
| To install the sitemap app, follow these steps: | ||||
|  | ||||
|     1. Add ``'django.contrib.sitemap'`` to your INSTALLED_APPS_ setting. | ||||
|     2. Make sure ``'django.template.loaders.app_directories.load_template_source'`` | ||||
|        is in your TEMPLATE_LOADERS_ setting. It's in there by default, so | ||||
|        you'll only need to change this if you've changed that setting. | ||||
|     3. Make sure you've installed the `sites framework`_. | ||||
|  | ||||
| (Note: The sitemap application doesn't install any database tables. The only | ||||
| reason it needs to go into ``INSTALLED_APPS`` is so that the | ||||
| ``load_template_source`` template loader can find the default templates.) | ||||
|  | ||||
| .. _INSTALLED_APPS: http://www.djangoproject.com/documentation/settings/#installed-apps | ||||
| .. _TEMPLATE_LOADERS: http://www.djangoproject.com/documentation/settings/#template-loaders | ||||
| .. _sites framework: http://www.djangoproject.com/documentation/sites/ | ||||
|  | ||||
| Initialization | ||||
| ============== | ||||
|  | ||||
| To activate sitemap generation on your Django site, add this line to your | ||||
| URLconf_: | ||||
|  | ||||
|     (r'^sitemap.xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps}) | ||||
|  | ||||
| This tells Django to build a sitemap when a client accesses ``/sitemap.xml``. | ||||
|  | ||||
| The name of the sitemap file is not important, but the location is. Google will | ||||
| only index links in your sitemap for the current URL level and below. For | ||||
| instance, if ``sitemap.xml`` lives in your root directory, it may reference any | ||||
| URL in your site. However, if your sitemap lives at ``/content/sitemap.xml``, | ||||
| it may only reference URLs that begin with ``/content/``. | ||||
|  | ||||
| The sitemap view takes an extra, required argument: ``{'sitemaps': sitemaps}``. | ||||
| ``sitemaps`` should be a dictionary that maps a short section label (e.g., | ||||
| ``blog`` or ``news``) to its ``Sitemap`` class (e.g., ``BlogSitemap`` or | ||||
| ``NewsSitemap``). It may also map to an *instance* of a ``Sitemap`` class | ||||
| (e.g., ``BlogSitemap(some_var)``). | ||||
|  | ||||
| .. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/ | ||||
|  | ||||
| Sitemap classes | ||||
| =============== | ||||
|  | ||||
| A ``Sitemap`` class is a simple Python class that represents a "section" of | ||||
| entries in your sitemap. For example, one ``Sitemap`` class could represent all | ||||
| the entries of your weblog, while another could represent all of the events in | ||||
| your events calendar. | ||||
|  | ||||
| In the simplest case, all these sections get lumped together into one | ||||
| ``sitemap.xml``, but it's also possible to use the framework to generate a | ||||
| sitemap index that references individual sitemap files, one per section. (See | ||||
| `Creating a sitemap index`_ below.) | ||||
|  | ||||
| ``Sitemap`` classes must subclass ``django.contrib.sitemap.Sitemap``. They can | ||||
| live anywhere in your codebase. | ||||
|  | ||||
| A simple example | ||||
| ================ | ||||
|  | ||||
| Let's assume you have a blog system, with an ``Entry`` model, and you want your | ||||
| sitemap to include all the links to your individual blog entries. Here's how | ||||
| your sitemap class might look:: | ||||
|  | ||||
|     from django.contrib.sitemap import Sitemap | ||||
|     from mysite.blog.models import Entry | ||||
|  | ||||
|     class BlogSitemap(Sitemap): | ||||
|         changefreq = "never" | ||||
|         priority = 0.5 | ||||
|  | ||||
|         def items(self): | ||||
|             return Entry.objects.filter(is_draft=False) | ||||
|  | ||||
|         def lastmod(self, obj): | ||||
|             return obj.pub_date | ||||
|  | ||||
| Note: | ||||
|  | ||||
|     * ``changefreq`` and ``priority`` are class attributes corresponding to | ||||
|       ``<changefreq>`` and ``<priority>`` elements, respectively. They can be | ||||
|       made callable as functions, as ``lastmod`` was in the example. | ||||
|     * ``items()`` is simply a method that returns a list of objects. The objects | ||||
|       returned will get passed to any callable methods corresponding to a | ||||
|       sitemap property (``location``, ``lastmod``, ``changefreq``, and | ||||
|       ``priority``). | ||||
|     * ``lastmod`` should return a Python ``datetime`` object. | ||||
|     * There is no ``location`` method in this example, but you can provide it | ||||
|       in order to specify the URL for your object. By default, ``location()`` | ||||
|       calls ``get_absolute_url()`` on each object and returns the result. | ||||
|  | ||||
| Sitemap class reference | ||||
| ======================= | ||||
|  | ||||
| A ``Sitemap`` class can define the following methods/attributes: | ||||
|  | ||||
| ``items`` | ||||
| --------- | ||||
|  | ||||
| **Required.** A method that returns a list of objects. The framework doesn't | ||||
| care what *type* of objects they are; all that matters is that these objects | ||||
| get passed to the ``location()``, ``lastmod()``, ``changefreq()`` and | ||||
| ``priority()`` methods. | ||||
|  | ||||
| ``location`` | ||||
| ------------ | ||||
|  | ||||
| **Optional.** Either a method or attribute. | ||||
|  | ||||
| If it's a method, it should return the absolute URL for a given object as | ||||
| returned by ``items()``. | ||||
|  | ||||
| If it's an attribute, its value should be a string representing an absolute URL | ||||
| to use for *every* object returned by ``items()``. | ||||
|  | ||||
| In both cases, "absolute URL" means a URL that doesn't include the protocol or | ||||
| domain. Examples: | ||||
|  | ||||
|     * Good: ``'/foo/bar/'`` | ||||
|     * Bad: ``'example.com/foo/bar/'`` | ||||
|     * Bad: ``'http://example.com/foo/bar/'`` | ||||
|  | ||||
| If ``location`` isn't provided, the framework will call the | ||||
| ``get_absolute_url()`` method on each object as returned by ``items()``. | ||||
|  | ||||
| ``lastmod`` | ||||
| ----------- | ||||
|  | ||||
| **Optional.** Either a method or attribute. | ||||
|  | ||||
| If it's a method, it should take one argument -- an object as returned by | ||||
| ``items()`` -- and return that object's last-modified date/time, as a Python | ||||
| ``datetime.datetime`` object. | ||||
|  | ||||
| If it's an attribute, its value should be a Python ``datetime.datetime`` object | ||||
| representing the last-modified date/time for *every* object returned by | ||||
| ``items()``. | ||||
|  | ||||
| ``changefreq`` | ||||
| -------------- | ||||
|  | ||||
| **Optional.** Either a method or attribute. | ||||
|  | ||||
| If it's a method, it should take one argument -- an object as returned by | ||||
| ``items()`` -- and return that object's change frequency, as a Python string. | ||||
|  | ||||
| If it's an attribute, its value should be a string representing the change | ||||
| frequency of *every* object returned by ``items()``. | ||||
|  | ||||
| Possible values for ``changefreq``, whether you use a method or attribute, are: | ||||
|  | ||||
|     * ``'always'`` | ||||
|     * ``'hourly'`` | ||||
|     * ``'daily'`` | ||||
|     * ``'weekly'`` | ||||
|     * ``'monthly'`` | ||||
|     * ``'yearly'`` | ||||
|     * ``'never'`` | ||||
|  | ||||
| ``priority`` | ||||
| ------------ | ||||
|  | ||||
| **Optional.** Either a method or attribute. | ||||
|  | ||||
| If it's a method, it should take one argument -- an object as returned by | ||||
| ``items()`` -- and return that object's priority, as either a string or float. | ||||
|  | ||||
| If it's an attribute, its value should be either a string or float representing | ||||
| the priority of *every* object returned by ``items()``. | ||||
|  | ||||
| Example values for ``priority``: ``0.4``, ``1.0``. The default priority of a | ||||
| page is ``0.5``. See Google's documentation for more documentation. | ||||
|  | ||||
| .. _Google's documentation: http://www.google.com/webmasters/sitemaps/docs/en/protocol.html | ||||
|  | ||||
| Shortcuts | ||||
| ========= | ||||
|  | ||||
| The sitemap framework provides a couple convenience classes for common cases: | ||||
|  | ||||
| ``FlatpageSitemap`` | ||||
| ------------------- | ||||
|  | ||||
| The ``FlatpageSitemap`` class looks at all flatpages_ defined for the current | ||||
| ``SITE_ID`` (see the `sites documentation`_) and creates an entry in the | ||||
| sitemap. These entries include only the ``location`` attribute -- not | ||||
| ``lastmod``, ``changefreq`` or ``priority``. | ||||
|  | ||||
| .. _flatpages: http://www.djangoproject.com/documentation/flatpages/ | ||||
| .. _sites documentation: http://www.djangoproject.com/documentation/sites/ | ||||
|  | ||||
| ``GenericSitemap`` | ||||
| ------------------ | ||||
|  | ||||
| The ``GenericSitemap`` class works with any `generic views`_ you already have. | ||||
| To use it, create an instance, passing in the same ``info_dict`` you pass to | ||||
| the generic views. The only requirement is that the dictionary have a | ||||
| ``queryset`` entry. It may also have a ``date_field`` entry that specifies a | ||||
| date field for objects retrieved from the ``queryset``. This will be used for | ||||
| the ``lastmod`` attribute in the generated sitemap. You may also pass | ||||
| ``priority`` and ``changefreq`` keyword arguments to the ``GenericSitemap`` | ||||
| constructor to specify these attributes for all URLs. | ||||
|  | ||||
| .. _generic views: http://www.djangoproject.com/documentation/generic_views/ | ||||
|  | ||||
| Example | ||||
| ------- | ||||
|  | ||||
| Here's an example of a URLconf_ using both:: | ||||
|  | ||||
|     from django.conf.urls.defaults import * | ||||
|     from django.contrib.sitemap import FlatpageSitemap, GenericSitemap | ||||
|     from mysite.blog.models import Entry | ||||
|  | ||||
|     info_dict = { | ||||
|         'queryset': Entry.objects.all(), | ||||
|         'date_field': 'pub_date', | ||||
|     } | ||||
|  | ||||
|     sitemaps = { | ||||
|         'flatpages': FlatpageSitemap, | ||||
|         'blog': GenericSitemap(info_dict, priority=0.6), | ||||
|     } | ||||
|  | ||||
|     urlpatterns = patterns('', | ||||
|         # some generic view using info_dict | ||||
|         # ... | ||||
|  | ||||
|         # the sitemap | ||||
|         (r'^sitemap.xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps}) | ||||
|     ) | ||||
|  | ||||
| .. _URLconf: http://www.djangoproject.com/documentation/url_dispatch/ | ||||
|  | ||||
| Creating a sitemap index | ||||
| ======================== | ||||
|  | ||||
| The sitemap framework also has the ability to create a sitemap index that | ||||
| references individual sitemap files, one per each section defined in your | ||||
| ``sitemaps`` dictionary. The only differences in usage are: | ||||
|  | ||||
|     * You use two views in your URLconf: ``django.contrib.sitemap.views.index`` | ||||
|       and ``django.contrib.sitemap.views.sitemap``. | ||||
|     * The ``django.contrib.sitemap.views.sitemap`` view should take a | ||||
|       ``section`` keyword argument. | ||||
|  | ||||
| Here is what the relevant URLconf lines would look like for the example above:: | ||||
|  | ||||
|     (r'^sitemap.xml$', 'django.contrib.sitemap.views.index', {'sitemaps': sitemaps}) | ||||
|     (r'^sitemap-(?P<section>.+).xml$', 'django.contrib.sitemap.views.sitemap', {'sitemaps': sitemaps}) | ||||
|  | ||||
| This will automatically generate a ``sitemap.xml`` file that references | ||||
| both ``sitemap-flatpages.xml`` and ``sitemap-blog.xml``. The ``Sitemap`` | ||||
| classes and the ``sitemaps`` dict don't change at all. | ||||
|  | ||||
| Pinging Google | ||||
| ============== | ||||
|  | ||||
| You may want to "ping" Google when your sitemap changes, to let it know to | ||||
| reindex your site. The framework provides a function to do just that: | ||||
| ``django.contrib.sitemap.ping_google()``. | ||||
|  | ||||
| ``ping_google()`` takes an optional argument, ``sitemap_url``, which should be | ||||
| the absolute URL of your site's sitemap (e.g., ``'/sitemap.xml'``). If this | ||||
| argument isn't provided, ``ping_google()`` will attempt to figure out your | ||||
| sitemap by performing a reverse looking in your URLconf. | ||||
|  | ||||
| ``ping_google()`` raises the exception | ||||
| ``django.contrib.sitemap.SitemapNotFound`` if it cannot determine your sitemap | ||||
| URL. | ||||
|  | ||||
| One useful way to call ``ping_google()`` is from a model's ``save()`` method:: | ||||
|  | ||||
|     from django.contrib.sitemap import ping_google | ||||
|  | ||||
|     class Entry(models.Model): | ||||
|         # ... | ||||
|         def save(self): | ||||
|             super(Entry, self).save() | ||||
|             try: | ||||
|                 ping_google() | ||||
|             except Exception: | ||||
|                 # Bare 'except' because we could get a variety | ||||
|                 # of HTTP-related exceptions. | ||||
|                 pass | ||||
|  | ||||
| A more efficient solution, however, would be to call ``ping_google()`` from a | ||||
| cron script, or some other scheduled task. The function makes an HTTP request | ||||
| to Google's servers, so you may not want to introduce that network overhead | ||||
| each time you call ``save()``. | ||||
		Reference in New Issue
	
	Block a user