mirror of
				https://github.com/django/django.git
				synced 2025-10-25 06:36:07 +00:00 
			
		
		
		
	Fixed #20197 -- Made XML serializer fail loudly when outputting unserializable chars
Thanks Tim Graham for the review.
This commit is contained in:
		| @@ -14,7 +14,9 @@ from django.conf import settings | ||||
| from django.core.serializers import base | ||||
| from django.db import DEFAULT_DB_ALIAS, models | ||||
| from django.utils.encoding import smart_text | ||||
| from django.utils.xmlutils import SimplerXMLGenerator | ||||
| from django.utils.xmlutils import ( | ||||
|     SimplerXMLGenerator, UnserializableContentError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Serializer(base.Serializer): | ||||
| @@ -78,7 +80,11 @@ class Serializer(base.Serializer): | ||||
|  | ||||
|         # Get a "string version" of the object's data. | ||||
|         if getattr(obj, field.name) is not None: | ||||
|             self.xml.characters(field.value_to_string(obj)) | ||||
|             try: | ||||
|                 self.xml.characters(field.value_to_string(obj)) | ||||
|             except UnserializableContentError: | ||||
|                 raise ValueError("%s.%s (pk:%s) contains unserializable characters" % ( | ||||
|                     obj.__class__.__name__, field.name, obj._get_pk_val())) | ||||
|         else: | ||||
|             self.xml.addQuickElement("None") | ||||
|  | ||||
|   | ||||
| @@ -2,9 +2,14 @@ | ||||
| Utilities for XML generation/parsing. | ||||
| """ | ||||
|  | ||||
| import re | ||||
| from xml.sax.saxutils import XMLGenerator | ||||
|  | ||||
|  | ||||
| class UnserializableContentError(ValueError): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class SimplerXMLGenerator(XMLGenerator): | ||||
|     def addQuickElement(self, name, contents=None, attrs=None): | ||||
|         "Convenience method for adding an element with no children" | ||||
| @@ -14,3 +19,10 @@ class SimplerXMLGenerator(XMLGenerator): | ||||
|         if contents is not None: | ||||
|             self.characters(contents) | ||||
|         self.endElement(name) | ||||
|  | ||||
|     def characters(self, content): | ||||
|         if content and re.search(r'[\x00-\x08\x0B-\x0C\x0E-\x1F]', content): | ||||
|             # Fail loudly when content has control chars (unsupported in XML 1.0) | ||||
|             # See http://www.w3.org/International/questions/qa-controls | ||||
|             raise UnserializableContentError("Control characters are not supported in XML 1.0") | ||||
|         XMLGenerator.characters(self, content) | ||||
|   | ||||
| @@ -720,6 +720,10 @@ Miscellaneous | ||||
| * Private function ``django.utils.functional.total_ordering()`` has been | ||||
|   removed. It contained a workaround for a ``functools.total_ordering()`` bug | ||||
|   in Python versions older than 2.7.3. | ||||
| * XML serialization (either through :djadmin:`dumpdata` or the syndication | ||||
|   framework) used to output any characters it received. Now if the content to | ||||
|   be serialized contains any control characters not allowed in the XML 1.0 | ||||
|   standard, the serialization will fail with a :exc:`ValueError`. | ||||
|  | ||||
| .. _deprecated-features-1.9: | ||||
|  | ||||
|   | ||||
| @@ -213,6 +213,16 @@ the auth.User model has such a relation to the auth.Permission model:: | ||||
|  | ||||
| This example links the given user with the permission models with PKs 46 and 47. | ||||
|  | ||||
| .. admonition:: Control characters | ||||
|  | ||||
|     .. versionchanged:: 1.9 | ||||
|  | ||||
|     If the content to be serialized contains control characters that are not | ||||
|     accepted in the XML 1.0 standard, the serialization will fail with a | ||||
|     :exc:`ValueError` exception. Read also the W3C's explanation of `HTML, | ||||
|     XHTML, XML and Control Codes | ||||
|     <http://www.w3.org/International/questions/qa-controls>`_. | ||||
|  | ||||
| .. _serialization-formats-json: | ||||
|  | ||||
| JSON | ||||
|   | ||||
| @@ -371,6 +371,21 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase): | ||||
|                 ret_list.append("".join(temp)) | ||||
|         return ret_list | ||||
|  | ||||
|     def test_control_char_failure(self): | ||||
|         """ | ||||
|         Serializing control characters with XML should fail as those characters | ||||
|         are not supported in the XML 1.0 standard (except HT, LF, CR). | ||||
|         """ | ||||
|         self.a1.headline = "This contains \u0001 control \u0011 chars" | ||||
|         msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk | ||||
|         with self.assertRaisesMessage(ValueError, msg): | ||||
|             serializers.serialize(self.serializer_name, [self.a1]) | ||||
|         self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed" | ||||
|         self.assertIn( | ||||
|             "HT \t, LF \n, and CR \r are allowed", | ||||
|             serializers.serialize(self.serializer_name, [self.a1]) | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class XmlSerializerTransactionTestCase(SerializersTransactionTestBase, TransactionTestCase): | ||||
|     serializer_name = "xml" | ||||
|   | ||||
		Reference in New Issue
	
	Block a user