mirror of
				https://github.com/django/django.git
				synced 2025-10-25 14:46:09 +00:00 
			
		
		
		
	[1.5.x] Fixed #19237 -- Improved strip_tags utility
The previous pattern didn't properly addressed cases where '>'
was present inside quoted tag content.
Backport of bf1871d87 from master.
			
			
This commit is contained in:
		| @@ -33,6 +33,7 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') | |||||||
| html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) | html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) | ||||||
| hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) | hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) | ||||||
| trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z') | trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z') | ||||||
|  | strip_tags_re = re.compile(r'</?\S([^=]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE) | ||||||
|  |  | ||||||
|  |  | ||||||
| def escape(text): | def escape(text): | ||||||
| @@ -117,7 +118,7 @@ linebreaks = allow_lazy(linebreaks, six.text_type) | |||||||
|  |  | ||||||
| def strip_tags(value): | def strip_tags(value): | ||||||
|     """Returns the given HTML with all tags stripped.""" |     """Returns the given HTML with all tags stripped.""" | ||||||
|     return re.sub(r'<[^>]*?>', '', force_text(value)) |     return strip_tags_re.sub('', force_text(value)) | ||||||
| strip_tags = allow_lazy(strip_tags) | strip_tags = allow_lazy(strip_tags) | ||||||
|  |  | ||||||
| def remove_tags(html, tags): | def remove_tags(html, tags): | ||||||
|   | |||||||
| @@ -65,6 +65,9 @@ class TestUtilsHtml(unittest.TestCase): | |||||||
|             ('<f', '<f'), |             ('<f', '<f'), | ||||||
|             ('</fe', '</fe'), |             ('</fe', '</fe'), | ||||||
|             ('<x>b<y>', 'b'), |             ('<x>b<y>', 'b'), | ||||||
|  |             ('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'), | ||||||
|  |             ('a<p a >b</p>c', 'abc'), | ||||||
|  |             ('d<a:b c:d>e</p>f', 'def'), | ||||||
|         ) |         ) | ||||||
|         for value, output in items: |         for value, output in items: | ||||||
|             self.check_output(f, value, output) |             self.check_output(f, value, output) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user