From 171df93170df23d2fea1f8320ddb80c6f6444ff7 Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Thu, 5 May 2011 20:49:26 +0000 Subject: [PATCH] Fixed #15954 - New IGNORABLE_404_URLS setting that allows more powerful filtering of 404s to ignore Thanks to aaugustin for implementing this. (Technically this doesn't fix the original report, as we've decided against having *any* default values, but the new feature makes it possible, and the docs have an example addressing #15954). git-svn-id: http://code.djangoproject.com/svn/django/trunk@16160 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/conf/global_settings.py | 14 +++++-- django/middleware/common.py | 24 ++++++++---- docs/howto/error-reporting.txt | 29 +++++++++++++-- docs/internals/deprecation.txt | 4 ++ docs/ref/settings.txt | 45 +++++++++++++++-------- docs/releases/1.4.txt | 38 +++++++++++++++++++ tests/regressiontests/middleware/tests.py | 45 +++++++++++++++++++++-- 7 files changed, 165 insertions(+), 34 deletions(-) diff --git a/django/conf/global_settings.py b/django/conf/global_settings.py index 1ab9b498a9..477f13b675 100644 --- a/django/conf/global_settings.py +++ b/django/conf/global_settings.py @@ -246,9 +246,17 @@ ALLOWED_INCLUDE_ROOTS = () # is an admin. ADMIN_FOR = () -# 404s that may be ignored. -IGNORABLE_404_STARTS = ('/cgi-bin/', '/_vti_bin', '/_vti_inf') -IGNORABLE_404_ENDS = ('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi', 'favicon.ico', '.php') +# List of compiled regular expression objects representing URLs that need not +# be reported when SEND_BROKEN_LINK_EMAILS is True. Here are a few examples: +# import re +# IGNORABLE_404_URLS = ( +# re.compile(r'^/apple-touch-icon.*\.png$'), +# re.compile(r'^/favicon.ico$), +# re.compile(r'^/robots.txt$), +# re.compile(r'^/phpmyadmin/), +# re.compile(r'\.(cgi|php|pl)$'), +# ) +IGNORABLE_404_URLS = () # A secret key for this particular Django installation. Used in secret-key # hashing algorithms. Set this in your settings, or Django will complain diff --git a/django/middleware/common.py b/django/middleware/common.py index eb145edf19..689929bea4 100644 --- a/django/middleware/common.py +++ b/django/middleware/common.py @@ -127,13 +127,23 @@ def _is_ignorable_404(uri): """ Returns True if a 404 at the given URL *shouldn't* notify the site managers. """ - for start in settings.IGNORABLE_404_STARTS: - if uri.startswith(start): - return True - for end in settings.IGNORABLE_404_ENDS: - if uri.endswith(end): - return True - return False + if getattr(settings, 'IGNORABLE_404_STARTS', ()): + import warnings + warnings.warn('The IGNORABLE_404_STARTS setting has been deprecated ' + 'in favour of IGNORABLE_404_URLS.', + PendingDeprecationWarning) + for start in settings.IGNORABLE_404_STARTS: + if uri.startswith(start): + return True + if getattr(settings, 'IGNORABLE_404_ENDS', ()): + import warnings + warnings.warn('The IGNORABLE_404_ENDS setting has been deprecated ' + 'in favour of IGNORABLE_404_URLS.', + PendingDeprecationWarning) + for end in settings.IGNORABLE_404_ENDS: + if uri.endswith(end): + return True + return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS) def _is_internal_request(domain, referer): """ diff --git a/docs/howto/error-reporting.txt b/docs/howto/error-reporting.txt index 063f943894..ddef93873e 100644 --- a/docs/howto/error-reporting.txt +++ b/docs/howto/error-reporting.txt @@ -66,15 +66,29 @@ a referer. (It doesn't bother to email for 404s that don't have a referer -- those are usually just people typing in broken URLs or broken Web 'bots). You can tell Django to stop reporting particular 404s by tweaking the -:setting:`IGNORABLE_404_ENDS` and :setting:`IGNORABLE_404_STARTS` settings. Both -should be a tuple of strings. For example:: +:setting:`IGNORABLE_404_URLS` setting. It should be a tuple of compiled +regular expression objects. For example:: - IGNORABLE_404_ENDS = ('.php', '.cgi') - IGNORABLE_404_STARTS = ('/phpmyadmin/',) + import re + IGNORABLE_404_URLS = ( + re.compile(r'\.(php|cgi)$'), + re.compile(r'^/phpmyadmin/'), + ) In this example, a 404 to any URL ending with ``.php`` or ``.cgi`` will *not* be reported. Neither will any URL starting with ``/phpmyadmin/``. +The following example shows how to exclude some conventional URLs that browsers and +crawlers often request:: + + import re + IGNORABLE_404_URLS = ( + re.compile(r'^/apple-touch-icon.*\.png$'), + re.compile(r'^/favicon.ico$), + re.compile(r'^/robots.txt$), + ) + + The best way to disable this behavior is to set :setting:`SEND_BROKEN_LINK_EMAILS` to ``False``. @@ -93,3 +107,10 @@ The best way to disable this behavior is to set records are ignored, but you can use them for error reporting by writing a handler and :doc:`configuring logging ` appropriately. +.. seealso:: + + .. versionchanged:: 1.4 + + Previously, two settings were used to control which URLs not to report: + :setting:`IGNORABLE_404_STARTS` and :setting:`IGNORABLE_404_ENDS`. They + were replaced by :setting:`IGNORABLE_404_URLS`. diff --git a/docs/internals/deprecation.txt b/docs/internals/deprecation.txt index 98b2138bc1..8ebeb668e9 100644 --- a/docs/internals/deprecation.txt +++ b/docs/internals/deprecation.txt @@ -199,6 +199,10 @@ their deprecation, as per the :ref:`Django deprecation policy ISO 3166 code for United Kingdom). They have been depreacted since the 1.4 release. + * The :setting:`IGNORABLE_404_STARTS` and :setting:`IGNORABLE_404_ENDS` + settings have been superseded by :setting:`IGNORABLE_404_URLS` in + the 1.4 release. They will be removed. + * 2.0 * ``django.views.defaults.shortcut()``. This function has been moved to ``django.contrib.contenttypes.views.shortcut()`` as part of the diff --git a/docs/ref/settings.txt b/docs/ref/settings.txt index 69892c7267..3a28745598 100644 --- a/docs/ref/settings.txt +++ b/docs/ref/settings.txt @@ -1020,25 +1020,23 @@ Available formats are ``DATE_FORMAT``, ``TIME_FORMAT``, ``DATETIME_FORMAT``, ``SHORT_DATETIME_FORMAT``, ``FIRST_DAY_OF_WEEK``, ``DECIMAL_SEPARATOR``, ``THOUSAND_SEPARATOR`` and ``NUMBER_GROUPING``. -.. setting:: IGNORABLE_404_ENDS +.. setting:: IGNORABLE_404_URLS -IGNORABLE_404_ENDS +IGNORABLE_404_URLS ------------------ -Default: ``('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi', 'favicon.ico', '.php')`` +.. versionadded:: 1.4 -See also ``IGNORABLE_404_STARTS`` and ``Error reporting via email``. +Default: ``()`` -.. setting:: IGNORABLE_404_STARTS +List of compiled regular expression objects describing URLs that should be +ignored when reporting HTTP 404 errors via email (see +:doc:`/howto/error-reporting`). Use this if your site does not provide a +commonly requested file such as ``favicon.ico`` or ``robots.txt``, or if it +gets hammered by script kiddies. -IGNORABLE_404_STARTS --------------------- - -Default: ``('/cgi-bin/', '/_vti_bin', '/_vti_inf')`` - -A tuple of strings that specify beginnings of URLs that should be ignored by -the 404 emailer. See ``SEND_BROKEN_LINK_EMAILS``, ``IGNORABLE_404_ENDS`` and -the :doc:`/howto/error-reporting`. +This is only used if :setting:`SEND_BROKEN_LINK_EMAILS` is set to ``True`` and +``CommonMiddleware`` is installed (see :doc:`/topics/http/middleware`). .. setting:: INSTALLED_APPS @@ -1435,8 +1433,8 @@ Default: ``False`` Whether to send an email to the ``MANAGERS`` each time somebody visits a Django-powered page that is 404ed with a non-empty referer (i.e., a broken link). This is only used if ``CommonMiddleware`` is installed (see -:doc:`/topics/http/middleware`. See also ``IGNORABLE_404_STARTS``, -``IGNORABLE_404_ENDS`` and :doc:`/howto/error-reporting`. +:doc:`/topics/http/middleware`). See also ``IGNORABLE_404_URLS`` and +:doc:`/howto/error-reporting`. .. setting:: SERIALIZATION_MODULES @@ -2045,6 +2043,22 @@ DATABASE_USER This setting has been replaced by :setting:`USER` in :setting:`DATABASES`. +.. setting:: IGNORABLE_404_ENDS + +IGNORABLE_404_ENDS +------------------ + +.. deprecated:: 1.4 + This setting has been superseded by :setting:`IGNORABLE_404_URLS`. + +.. setting:: IGNORABLE_404_STARTS + +IGNORABLE_404_STARTS +-------------------- + +.. deprecated:: 1.4 + This setting has been superseded by :setting:`IGNORABLE_404_URLS`. + .. setting:: TEST_DATABASE_CHARSET TEST_DATABASE_CHARSET @@ -2071,4 +2085,3 @@ TEST_DATABASE_NAME .. deprecated:: 1.2 This setting has been replaced by :setting:`TEST_NAME` in :setting:`DATABASES`. - diff --git a/docs/releases/1.4.txt b/docs/releases/1.4.txt index 3e82253116..1b8510f461 100644 --- a/docs/releases/1.4.txt +++ b/docs/releases/1.4.txt @@ -176,3 +176,41 @@ Save this model manager in your custom comment app (e.g. in For more details see the docs about :doc:`customizing the comments framework `. + +`IGNORABLE_404_STARTS` and `IGNORABLE_404_ENDS` settings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Django can report 404 errors: see :doc:`/howto/error-reporting`. +Until Django 1.3, it was possible to exclude some URLs from the reporting +by adding prefixes to :setting:`IGNORABLE_404_STARTS` and suffixes to +:setting:`IGNORABLE_404_ENDS`. + +In Django 1.4, these two settings are superseded by +:setting:`IGNORABLE_404_URLS`, which is a list of compiled regular expressions. +Django won't send an email for 404 errors on URLs that match any of them. + +Furthermore, the previous settings had some rather arbitrary default values:: + + IGNORABLE_404_STARTS = ('/cgi-bin/', '/_vti_bin', '/_vti_inf') + IGNORABLE_404_ENDS = ('mail.pl', 'mailform.pl', 'mail.cgi', 'mailform.cgi', + 'favicon.ico', '.php') + +It's not Django's role to decide if your website has a legacy ``/cgi-bin/`` +section or a ``favicon.ico``. As a consequence, the default values of +:setting:`IGNORABLE_404_URLS`, :setting:`IGNORABLE_404_STARTS` and +:setting:`IGNORABLE_404_ENDS` are all now empty. + +If you have customized :setting:`IGNORABLE_404_STARTS` or +:setting:`IGNORABLE_404_ENDS`, or if you want to keep the old default value, +you should add the following lines in your settings file:: + + import re + IGNORABLE_404_URLS = ( + # for each in IGNORABLE_404_STARTS + re.compile(r'^'), + # for each in IGNORABLE_404_ENDS + re.compile(r'$'), + ) + +Don't forget to escape characters that have a special meaning in a regular +expression. diff --git a/tests/regressiontests/middleware/tests.py b/tests/regressiontests/middleware/tests.py index c364cb436e..c069228487 100644 --- a/tests/regressiontests/middleware/tests.py +++ b/tests/regressiontests/middleware/tests.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- +import re + from django.conf import settings +from django.core import mail from django.http import HttpRequest from django.middleware.common import CommonMiddleware from django.middleware.http import ConditionalGetMiddleware @@ -9,12 +12,16 @@ from django.test import TestCase class CommonMiddlewareTest(TestCase): def setUp(self): - self.slash = settings.APPEND_SLASH - self.www = settings.PREPEND_WWW + self.append_slash = settings.APPEND_SLASH + self.prepend_www = settings.PREPEND_WWW + self.ignorable_404_urls = settings.IGNORABLE_404_URLS + self.send_broken_email_links = settings.SEND_BROKEN_LINK_EMAILS def tearDown(self): - settings.APPEND_SLASH = self.slash - settings.PREPEND_WWW = self.www + settings.APPEND_SLASH = self.append_slash + settings.PREPEND_WWW = self.prepend_www + settings.IGNORABLE_404_URLS = self.ignorable_404_urls + settings.SEND_BROKEN_LINK_EMAILS = self.send_broken_email_links def _get_request(self, path): request = HttpRequest() @@ -249,6 +256,36 @@ class CommonMiddlewareTest(TestCase): self.assertEqual(r['Location'], 'http://www.testserver/middleware/customurlconf/slash/') + # Tests for the 404 error reporting via email + + def test_404_error_reporting(self): + settings.IGNORABLE_404_URLS = (re.compile(r'foo'),) + settings.SEND_BROKEN_LINK_EMAILS = True + request = self._get_request('regular_url/that/does/not/exist') + request.META['HTTP_REFERER'] = '/another/url/' + response = self.client.get(request.path) + CommonMiddleware().process_response(request, response) + self.assertEqual(len(mail.outbox), 1) + self.assertIn('Broken', mail.outbox[0].subject) + + def test_404_error_reporting_no_referer(self): + settings.IGNORABLE_404_URLS = (re.compile(r'foo'),) + settings.SEND_BROKEN_LINK_EMAILS = True + request = self._get_request('regular_url/that/does/not/exist') + response = self.client.get(request.path) + CommonMiddleware().process_response(request, response) + self.assertEqual(len(mail.outbox), 0) + + def test_404_error_reporting_ignored_url(self): + settings.IGNORABLE_404_URLS = (re.compile(r'foo'),) + settings.SEND_BROKEN_LINK_EMAILS = True + request = self._get_request('foo_url/that/does/not/exist/either') + request.META['HTTP_REFERER'] = '/another/url/' + response = self.client.get(request.path) + CommonMiddleware().process_response(request, response) + self.assertEqual(len(mail.outbox), 0) + + class ConditionalGetMiddlewareTest(TestCase): urls = 'regressiontests.middleware.cond_get_urls' def setUp(self):