mirror of
				https://github.com/django/django.git
				synced 2025-10-24 22:26:08 +00:00 
			
		
		
		
	Fixed #7704, #14045 and #15495 -- Introduce a lexer for Javascript to fix multiple problems of the translation of Javascript files with xgettext. Many thanks to Ned Batchelder for his contribution of the JsLex library.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@16333 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
		| @@ -9,8 +9,8 @@ from subprocess import PIPE, Popen | |||||||
|  |  | ||||||
| from django.core.management.base import CommandError, NoArgsCommand | from django.core.management.base import CommandError, NoArgsCommand | ||||||
| from django.utils.text import get_text_list | from django.utils.text import get_text_list | ||||||
|  | from django.utils.jslex import prepare_js_for_gettext | ||||||
|  |  | ||||||
| pythonize_re = re.compile(r'(?:^|\n)\s*//') |  | ||||||
| plural_forms_re = re.compile(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL) | plural_forms_re = re.compile(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL) | ||||||
|  |  | ||||||
| def handle_extensions(extensions=('html',)): | def handle_extensions(extensions=('html',)): | ||||||
| @@ -184,15 +184,15 @@ def make_messages(locale=None, domain='django', verbosity='1', all=False, | |||||||
|                 if verbosity > 1: |                 if verbosity > 1: | ||||||
|                     sys.stdout.write('processing file %s in %s\n' % (file, dirpath)) |                     sys.stdout.write('processing file %s in %s\n' % (file, dirpath)) | ||||||
|                 src = open(os.path.join(dirpath, file), "rU").read() |                 src = open(os.path.join(dirpath, file), "rU").read() | ||||||
|                 src = pythonize_re.sub('\n#', src) |                 src = prepare_js_for_gettext(src) | ||||||
|                 thefile = '%s.py' % file |                 thefile = '%s.c' % file | ||||||
|                 f = open(os.path.join(dirpath, thefile), "w") |                 f = open(os.path.join(dirpath, thefile), "w") | ||||||
|                 try: |                 try: | ||||||
|                     f.write(src) |                     f.write(src) | ||||||
|                 finally: |                 finally: | ||||||
|                     f.close() |                     f.close() | ||||||
|                 cmd = ( |                 cmd = ( | ||||||
|                     'xgettext -d %s -L Perl %s --keyword=gettext_noop ' |                     'xgettext -d %s -L C %s --keyword=gettext_noop ' | ||||||
|                     '--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 ' |                     '--keyword=gettext_lazy --keyword=ngettext_lazy:1,2 ' | ||||||
|                     '--keyword=pgettext:1c,2 --keyword=npgettext:1c,2,3 ' |                     '--keyword=pgettext:1c,2 --keyword=npgettext:1c,2,3 ' | ||||||
|                     '--from-code UTF-8 --add-comments=Translators -o - "%s"' % ( |                     '--from-code UTF-8 --add-comments=Translators -o - "%s"' % ( | ||||||
|   | |||||||
							
								
								
									
										213
									
								
								django/utils/jslex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										213
									
								
								django/utils/jslex.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,213 @@ | |||||||
|  | """JsLex: a lexer for Javascript""" | ||||||
|  | # Originally from https://bitbucket.org/ned/jslex | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | class Tok(object): | ||||||
|  |     """ | ||||||
|  |     A specification for a token class. | ||||||
|  |     """ | ||||||
|  |     num = 0 | ||||||
|  |  | ||||||
|  |     def __init__(self, name, regex, next=None): | ||||||
|  |         self.id = Tok.num | ||||||
|  |         Tok.num += 1 | ||||||
|  |         self.name = name | ||||||
|  |         self.regex = regex | ||||||
|  |         self.next = next | ||||||
|  |  | ||||||
|  | def literals(choices, prefix="", suffix=""): | ||||||
|  |     """ | ||||||
|  |     Create a regex from a space-separated list of literal `choices`. | ||||||
|  |  | ||||||
|  |     If provided, `prefix` and `suffix` will be attached to each choice | ||||||
|  |     individually. | ||||||
|  |  | ||||||
|  |     """ | ||||||
|  |     return "|".join(prefix+re.escape(c)+suffix for c in choices.split()) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class Lexer(object): | ||||||
|  |     """ | ||||||
|  |     A generic multi-state regex-based lexer. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def __init__(self, states, first): | ||||||
|  |         self.regexes = {} | ||||||
|  |         self.toks = {} | ||||||
|  |  | ||||||
|  |         for state, rules in states.items(): | ||||||
|  |             parts = [] | ||||||
|  |             for tok in rules: | ||||||
|  |                 groupid = "t%d" % tok.id | ||||||
|  |                 self.toks[groupid] = tok | ||||||
|  |                 parts.append("(?P<%s>%s)" % (groupid, tok.regex)) | ||||||
|  |             self.regexes[state] = re.compile("|".join(parts), re.MULTILINE|re.VERBOSE) | ||||||
|  |  | ||||||
|  |         self.state = first | ||||||
|  |  | ||||||
|  |     def lex(self, text): | ||||||
|  |         """ | ||||||
|  |         Lexically analyze `text`. | ||||||
|  |  | ||||||
|  |         Yields pairs (`name`, `tokentext`). | ||||||
|  |         """ | ||||||
|  |         while text: | ||||||
|  |             eaten = 0 | ||||||
|  |             for match in self.regexes[self.state].finditer(text): | ||||||
|  |                 for name, toktext in match.groupdict().iteritems(): | ||||||
|  |                     if toktext is not None: | ||||||
|  |                         tok = self.toks[name] | ||||||
|  |                         new_state = tok.next | ||||||
|  |                         eaten += len(toktext) | ||||||
|  |                         yield (tok.name, toktext) | ||||||
|  |                 if new_state: | ||||||
|  |                     self.state = new_state | ||||||
|  |                     break | ||||||
|  |             text = text[eaten:] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class JsLexer(Lexer): | ||||||
|  |     """ | ||||||
|  |     A Javascript lexer | ||||||
|  |  | ||||||
|  |     >>> lexer = JsLexer() | ||||||
|  |     >>> list(lexer.lex("a = 1")) | ||||||
|  |     [('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')] | ||||||
|  |  | ||||||
|  |     This doesn't properly handle non-Ascii characters in the Javascript source. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     # Because these tokens are matched as alternatives in a regex, longer | ||||||
|  |     # possibilities must appear in the list before shorter ones, for example, | ||||||
|  |     # '>>' before '>'. | ||||||
|  |     # | ||||||
|  |     # Note that we don't have to detect malformed Javascript, only properly | ||||||
|  |     # lex correct Javascript, so much of this is simplified. | ||||||
|  |  | ||||||
|  |     # Details of Javascript lexical structure are taken from | ||||||
|  |     # http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf | ||||||
|  |  | ||||||
|  |     # A useful explanation of automatic semicolon insertion is at | ||||||
|  |     # http://inimino.org/~inimino/blog/javascript_semicolons | ||||||
|  |  | ||||||
|  |     both_before = [ | ||||||
|  |         Tok("comment",      r"/\*(.|\n)*?\*/"), | ||||||
|  |         Tok("linecomment",  r"//.*?$"), | ||||||
|  |         Tok("ws",           r"\s+"), | ||||||
|  |         Tok("keyword",      literals(""" | ||||||
|  |                                 break case catch class const continue debugger | ||||||
|  |                                 default delete do else enum export extends | ||||||
|  |                                 finally for function if import in instanceof | ||||||
|  |                                 new return super switch this throw try typeof | ||||||
|  |                                 var void while with | ||||||
|  |                                 """, suffix=r"\b"), next='reg'), | ||||||
|  |         Tok("reserved",     literals("null true false", suffix=r"\b"), next='div'), | ||||||
|  |         Tok("id",           r""" | ||||||
|  |                             ([a-zA-Z_$   ]|\\u[0-9a-fA-Z]{4})   # first char | ||||||
|  |                             ([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})*  # rest chars | ||||||
|  |                             """, next='div'), | ||||||
|  |         Tok("hnum",         r"0[xX][0-9a-fA-F]+", next='div'), | ||||||
|  |         Tok("onum",         r"0[0-7]+"), | ||||||
|  |         Tok("dnum",         r""" | ||||||
|  |                             (   (0|[1-9][0-9]*)     # DecimalIntegerLiteral | ||||||
|  |                                 \.                  # dot | ||||||
|  |                                 [0-9]*              # DecimalDigits-opt | ||||||
|  |                                 ([eE][-+]?[0-9]+)?  # ExponentPart-opt | ||||||
|  |                             | | ||||||
|  |                                 \.                  # dot | ||||||
|  |                                 [0-9]+              # DecimalDigits | ||||||
|  |                                 ([eE][-+]?[0-9]+)?  # ExponentPart-opt | ||||||
|  |                             | | ||||||
|  |                                 (0|[1-9][0-9]*)     # DecimalIntegerLiteral | ||||||
|  |                                 ([eE][-+]?[0-9]+)?  # ExponentPart-opt | ||||||
|  |                             ) | ||||||
|  |                             """, next='div'), | ||||||
|  |         Tok("punct",        literals(""" | ||||||
|  |                                 >>>= === !== >>> <<= >>= <= >= == != << >> && | ||||||
|  |                                 || += -= *= %= &= |= ^= | ||||||
|  |                                 """), next="reg"), | ||||||
|  |         Tok("punct",        literals("++ -- ) ]"), next='div'), | ||||||
|  |         Tok("punct",        literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'), | ||||||
|  |         Tok("string",       r'"([^"\\]|(\\(.|\n)))*?"', next='div'), | ||||||
|  |         Tok("string",       r"'([^'\\]|(\\(.|\n)))*?'", next='div'), | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  |     both_after = [ | ||||||
|  |         Tok("other",        r"."), | ||||||
|  |     ] | ||||||
|  |  | ||||||
|  |     states = { | ||||||
|  |         'div': # slash will mean division | ||||||
|  |             both_before + [ | ||||||
|  |             Tok("punct", literals("/= /"), next='reg'), | ||||||
|  |             ] + both_after, | ||||||
|  |  | ||||||
|  |         'reg':  # slash will mean regex | ||||||
|  |             both_before + [ | ||||||
|  |             Tok("regex", | ||||||
|  |                 r""" | ||||||
|  |                     /                       # opening slash | ||||||
|  |                     # First character is.. | ||||||
|  |                     (   [^*\\/[]            # anything but * \ / or [ | ||||||
|  |                     |   \\.                 # or an escape sequence | ||||||
|  |                     |   \[                  # or a class, which has | ||||||
|  |                             (   [^\]\\]     #   anything but \ or ] | ||||||
|  |                             |   \\.         #   or an escape sequence | ||||||
|  |                             )*              #   many times | ||||||
|  |                         \] | ||||||
|  |                     ) | ||||||
|  |                     # Following characters are same, except for excluding a star | ||||||
|  |                     (   [^\\/[]             # anything but \ / or [ | ||||||
|  |                     |   \\.                 # or an escape sequence | ||||||
|  |                     |   \[                  # or a class, which has | ||||||
|  |                             (   [^\]\\]     #   anything but \ or ] | ||||||
|  |                             |   \\.         #   or an escape sequence | ||||||
|  |                             )*              #   many times | ||||||
|  |                         \] | ||||||
|  |                     )*                      # many times | ||||||
|  |                     /                       # closing slash | ||||||
|  |                     [a-zA-Z0-9]*            # trailing flags | ||||||
|  |                 """, next='div'), | ||||||
|  |             ] + both_after, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |     def __init__(self): | ||||||
|  |         super(JsLexer, self).__init__(self.states, 'reg') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def prepare_js_for_gettext(js): | ||||||
|  |     """ | ||||||
|  |     Convert the Javascript source `js` into something resembling C for | ||||||
|  |     xgettext. | ||||||
|  |  | ||||||
|  |     What actually happens is that all the regex literals are replaced with | ||||||
|  |     "REGEX". | ||||||
|  |     """ | ||||||
|  |     def escape_quotes(m): | ||||||
|  |         """Used in a regex to properly escape double quotes.""" | ||||||
|  |         s = m.group(0) | ||||||
|  |         if s == '"': | ||||||
|  |             return r'\"' | ||||||
|  |         else: | ||||||
|  |             return s | ||||||
|  |  | ||||||
|  |     lexer = JsLexer() | ||||||
|  |     c = [] | ||||||
|  |     for name, tok in lexer.lex(js): | ||||||
|  |         if name == 'regex': | ||||||
|  |             # C doesn't grok regexes, and they aren't needed for gettext, | ||||||
|  |             # so just output a string instead. | ||||||
|  |             tok = '"REGEX"'; | ||||||
|  |         elif name == 'string': | ||||||
|  |             # C doesn't have single-quoted strings, so make all strings | ||||||
|  |             # double-quoted. | ||||||
|  |             if tok.startswith("'"): | ||||||
|  |                 guts = re.sub(r"\\.|.", escape_quotes, tok[1:-1]) | ||||||
|  |                 tok = '"' + guts + '"' | ||||||
|  |         elif name == 'id': | ||||||
|  |             # C can't deal with Unicode escapes in identifiers.  We don't | ||||||
|  |             # need them for gettext anyway, so replace them with something | ||||||
|  |             # innocuous | ||||||
|  |             tok = tok.replace("\\", "U"); | ||||||
|  |         c.append(tok) | ||||||
|  |     return ''.join(c) | ||||||
| @@ -31,11 +31,13 @@ class ExtractorTests(TestCase): | |||||||
|     def assertMsgId(self, msgid, s, use_quotes=True): |     def assertMsgId(self, msgid, s, use_quotes=True): | ||||||
|         if use_quotes: |         if use_quotes: | ||||||
|             msgid = '"%s"' % msgid |             msgid = '"%s"' % msgid | ||||||
|  |         msgid = re.escape(msgid) | ||||||
|         return self.assertTrue(re.search('^msgid %s' % msgid, s, re.MULTILINE)) |         return self.assertTrue(re.search('^msgid %s' % msgid, s, re.MULTILINE)) | ||||||
|  |  | ||||||
|     def assertNotMsgId(self, msgid, s, use_quotes=True): |     def assertNotMsgId(self, msgid, s, use_quotes=True): | ||||||
|         if use_quotes: |         if use_quotes: | ||||||
|             msgid = '"%s"' % msgid |             msgid = '"%s"' % msgid | ||||||
|  |         msgid = re.escape(msgid) | ||||||
|         return self.assertTrue(not re.search('^msgid %s' % msgid, s, re.MULTILINE)) |         return self.assertTrue(not re.search('^msgid %s' % msgid, s, re.MULTILINE)) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -73,7 +75,7 @@ class BasicExtractorTests(ExtractorTests): | |||||||
|         self.assertTrue(os.path.exists(self.PO_FILE)) |         self.assertTrue(os.path.exists(self.PO_FILE)) | ||||||
|         po_contents = open(self.PO_FILE, 'r').read() |         po_contents = open(self.PO_FILE, 'r').read() | ||||||
|         self.assertMsgId('I think that 100%% is more that 50%% of anything.', po_contents) |         self.assertMsgId('I think that 100%% is more that 50%% of anything.', po_contents) | ||||||
|         self.assertMsgId('I think that 100%% is more that 50%% of %\(obj\)s.', po_contents) |         self.assertMsgId('I think that 100%% is more that 50%% of %(obj)s.', po_contents) | ||||||
|  |  | ||||||
|     def test_extraction_error(self): |     def test_extraction_error(self): | ||||||
|         os.chdir(self.test_dir) |         os.chdir(self.test_dir) | ||||||
| @@ -102,7 +104,17 @@ class JavascriptExtractorTests(ExtractorTests): | |||||||
|         po_contents = open(self.PO_FILE, 'r').read() |         po_contents = open(self.PO_FILE, 'r').read() | ||||||
|         self.assertMsgId('This literal should be included.', po_contents) |         self.assertMsgId('This literal should be included.', po_contents) | ||||||
|         self.assertMsgId('This one as well.', po_contents) |         self.assertMsgId('This one as well.', po_contents) | ||||||
|  |         self.assertMsgId(r'He said, \"hello\".', po_contents) | ||||||
|  |         self.assertMsgId("okkkk", po_contents) | ||||||
|  |         self.assertMsgId("TEXT", po_contents) | ||||||
|  |         self.assertMsgId("It's at http://example.com", po_contents) | ||||||
|  |         self.assertMsgId("String", po_contents) | ||||||
|  |         self.assertMsgId("/* but this one will be too */ 'cause there is no way of telling...", po_contents) | ||||||
|  |         self.assertMsgId("foo", po_contents) | ||||||
|  |         self.assertMsgId("bar", po_contents) | ||||||
|  |         self.assertMsgId("baz", po_contents) | ||||||
|  |         self.assertMsgId("quz", po_contents) | ||||||
|  |         self.assertMsgId("foobar", po_contents) | ||||||
|  |  | ||||||
| class IgnoredExtractorTests(ExtractorTests): | class IgnoredExtractorTests(ExtractorTests): | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,4 +1,47 @@ | |||||||
| // ' | // ' | ||||||
| gettext('This literal should be included.') | gettext('This literal should be included.') | ||||||
| // ' | x = y; // ' | ||||||
| gettext('This one as well.') | gettext("This one as well.") | ||||||
|  |  | ||||||
|  | /** (from ticket 7704) | ||||||
|  |  * ***************************** | ||||||
|  |  * AddModule main / window | ||||||
|  |  * @constructor | ||||||
|  |  * @class MyDesktop.AddModule | ||||||
|  |  * ***************************** | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | gettext('He said, \"hello".') | ||||||
|  |  | ||||||
|  | // from ticket 14045 | ||||||
|  | function mfunc() { | ||||||
|  |     var val = 0; | ||||||
|  |     return val ? 1 : 0; | ||||||
|  | } | ||||||
|  | gettext('okkkk'); | ||||||
|  | print mysub(); | ||||||
|  |  | ||||||
|  | // from ticket 15495 | ||||||
|  | /* / ' */ gettext("TEXT"); | ||||||
|  |  | ||||||
|  | gettext("It's at http://example.com") | ||||||
|  |  | ||||||
|  | // also from ticket 15495 | ||||||
|  | gettext("String"); // This comment won't be caught by pythonize_re and it contains "'" which is a string start in Perl | ||||||
|  | /* | ||||||
|  |  * This one will be removed by the patch | ||||||
|  |  */ | ||||||
|  | gettext("/* but this one will be too */ 'cause there is no way of telling..."); | ||||||
|  | f(/* ... if it's different from this one */); | ||||||
|  |  | ||||||
|  | // from ticket 15331 | ||||||
|  | gettext("foo"); | ||||||
|  | true ? true : false; | ||||||
|  | gettext("bar"); | ||||||
|  | true ? true : false; | ||||||
|  | gettext("baz"); | ||||||
|  | true ? true : false; // ? | ||||||
|  | gettext("quz"); | ||||||
|  | "?"; | ||||||
|  | gettext("foobar"); | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										217
									
								
								tests/regressiontests/utils/jslex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										217
									
								
								tests/regressiontests/utils/jslex.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,217 @@ | |||||||
|  | """Tests for jslex.""" | ||||||
|  | # encoding: utf-8 | ||||||
|  | # originally from https://bitbucket.org/ned/jslex | ||||||
|  |  | ||||||
|  | import difflib | ||||||
|  | from django.test import TestCase | ||||||
|  | from django.utils.jslex import JsLexer, prepare_js_for_gettext | ||||||
|  |  | ||||||
|  | class JsTokensTest(TestCase): | ||||||
|  |     LEX_CASES = [ | ||||||
|  |         # ids | ||||||
|  |         ("a ABC $ _ a123", ["id a", "id ABC", "id $", "id _", "id a123"]), | ||||||
|  |         (r"\u1234 abc\u0020 \u0065_\u0067", [r"id \u1234", r"id abc\u0020", r"id \u0065_\u0067"]), | ||||||
|  |         # numbers | ||||||
|  |         ("123 1.234 0.123e-3 0 1E+40 1e1 .123", ["dnum 123", "dnum 1.234", "dnum 0.123e-3", "dnum 0", "dnum 1E+40", "dnum 1e1", "dnum .123"]), | ||||||
|  |         ("0x1 0xabCD 0XABcd", ["hnum 0x1", "hnum 0xabCD", "hnum 0XABcd"]), | ||||||
|  |         ("010 0377 090", ["onum 010", "onum 0377", "dnum 0", "dnum 90"]), | ||||||
|  |         ("0xa123ghi", ["hnum 0xa123", "id ghi"]), | ||||||
|  |         # keywords | ||||||
|  |         ("function Function FUNCTION", ["keyword function", "id Function", "id FUNCTION"]), | ||||||
|  |         ("const constructor in inherits", ["keyword const", "id constructor", "keyword in", "id inherits"]), | ||||||
|  |         ("true true_enough", ["reserved true", "id true_enough"]), | ||||||
|  |         # strings | ||||||
|  |         (''' 'hello' "hello" ''', ["string 'hello'", 'string "hello"']), | ||||||
|  |         (r""" 'don\'t' "don\"t" '"' "'" '\'' "\"" """, | ||||||
|  |          [r"""string 'don\'t'""", r'''string "don\"t"''', r"""string '"'""", r'''string "'"''', r"""string '\''""", r'''string "\""''']), | ||||||
|  |         (ur'"ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""', [ur'string "ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""']), | ||||||
|  |         # comments | ||||||
|  |         ("a//b", ["id a", "linecomment //b"]), | ||||||
|  |         ("/****/a/=2//hello", ["comment /****/", "id a", "punct /=", "dnum 2", "linecomment //hello"]), | ||||||
|  |         ("/*\n * Header\n */\na=1;", ["comment /*\n * Header\n */", "id a", "punct =", "dnum 1", "punct ;"]), | ||||||
|  |         # punctuation | ||||||
|  |         ("a+++b", ["id a", "punct ++", "punct +", "id b"]), | ||||||
|  |         # regex | ||||||
|  |         (r"a=/a*/,1", ["id a", "punct =", "regex /a*/", "punct ,", "dnum 1"]), | ||||||
|  |         (r"a=/a*[^/]+/,1", ["id a", "punct =", "regex /a*[^/]+/", "punct ,", "dnum 1"]), | ||||||
|  |         (r"a=/a*\[^/,1", ["id a", "punct =", r"regex /a*\[^/", "punct ,", "dnum 1"]), | ||||||
|  |         (r"a=/\//,1", ["id a", "punct =", r"regex /\//", "punct ,", "dnum 1"]), | ||||||
|  |  | ||||||
|  |         # next two are from http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions | ||||||
|  |         ("""for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}""", | ||||||
|  |             ["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in", | ||||||
|  |             "id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z", | ||||||
|  |             "punct :", "regex /x:3;x<5;y</g", "punct /", "id i", "punct )", "punct {", | ||||||
|  |             "id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]), | ||||||
|  |         ("""for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}""", | ||||||
|  |             ["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in", | ||||||
|  |             "id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z", | ||||||
|  |             "punct /", "id x", "punct :", "dnum 3", "punct ;", "id x", "punct <", "dnum 5", | ||||||
|  |             "punct ;", "id y", "punct <", "regex /g/i", "punct )", "punct {", | ||||||
|  |             "id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]), | ||||||
|  |  | ||||||
|  |         # Various "illegal" regexes that are valid according to the std. | ||||||
|  |         (r"""/????/, /++++/, /[----]/ """, ["regex /????/", "punct ,", "regex /++++/", "punct ,", "regex /[----]/"]), | ||||||
|  |  | ||||||
|  |         # Stress cases from http://stackoverflow.com/questions/5533925/what-javascript-constructs-does-jslex-incorrectly-lex/5573409#5573409 | ||||||
|  |         (r"""/\[/""", [r"""regex /\[/"""]), | ||||||
|  |         (r"""/[i]/""", [r"""regex /[i]/"""]), | ||||||
|  |         (r"""/[\]]/""", [r"""regex /[\]]/"""]), | ||||||
|  |         (r"""/a[\]]/""", [r"""regex /a[\]]/"""]), | ||||||
|  |         (r"""/a[\]]b/""", [r"""regex /a[\]]b/"""]), | ||||||
|  |         (r"""/[\]/]/gi""", [r"""regex /[\]/]/gi"""]), | ||||||
|  |         (r"""/\[[^\]]+\]/gi""", [r"""regex /\[[^\]]+\]/gi"""]), | ||||||
|  |         (""" | ||||||
|  |             rexl.re = { | ||||||
|  |             NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/, | ||||||
|  |             UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/, | ||||||
|  |             QUOTED_LITERAL: /^'(?:[^']|'')*'/, | ||||||
|  |             NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/, | ||||||
|  |             SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/ | ||||||
|  |             }; | ||||||
|  |         """, | ||||||
|  |         ["id rexl", "punct .", "id re", "punct =", "punct {", | ||||||
|  |          "id NAME", "punct :", r"""regex /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,", | ||||||
|  |          "id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,", | ||||||
|  |          "id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,", | ||||||
|  |          "id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,", | ||||||
|  |          "id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""", | ||||||
|  |          "punct }", "punct ;" | ||||||
|  |          ]), | ||||||
|  |  | ||||||
|  |         (""" | ||||||
|  |             rexl.re = { | ||||||
|  |             NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/, | ||||||
|  |             UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/, | ||||||
|  |             QUOTED_LITERAL: /^'(?:[^']|'')*'/, | ||||||
|  |             NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/, | ||||||
|  |             SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/ | ||||||
|  |             }; | ||||||
|  |             str = '"'; | ||||||
|  |         """, | ||||||
|  |         ["id rexl", "punct .", "id re", "punct =", "punct {", | ||||||
|  |          "id NAME", "punct :", r"""regex /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,", | ||||||
|  |          "id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,", | ||||||
|  |          "id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,", | ||||||
|  |          "id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,", | ||||||
|  |          "id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""", | ||||||
|  |          "punct }", "punct ;", | ||||||
|  |          "id str", "punct =", """string '"'""", "punct ;", | ||||||
|  |          ]), | ||||||
|  |  | ||||||
|  |         (r""" this._js = "e.str(\"" + this.value.replace(/\\/g, "\\\\").replace(/"/g, "\\\"") + "\")"; """, | ||||||
|  |          ["keyword this", "punct .", "id _js", "punct =", r'''string "e.str(\""''', "punct +", "keyword this", "punct .", | ||||||
|  |           "id value", "punct .", "id replace", "punct (", r"regex /\\/g", "punct ,", r'string "\\\\"', "punct )", | ||||||
|  |           "punct .", "id replace", "punct (", r'regex /"/g', "punct ,", r'string "\\\""', "punct )", "punct +", | ||||||
|  |           r'string "\")"', "punct ;"]), | ||||||
|  |         ] | ||||||
|  |  | ||||||
|  | def make_function(input, toks): | ||||||
|  |     def test_func(self): | ||||||
|  |         lexer = JsLexer() | ||||||
|  |         result = ["%s %s" % (name, tok) for name, tok in lexer.lex(input) if name != 'ws'] | ||||||
|  |         self.assertListEqual(result, toks) | ||||||
|  |     return test_func | ||||||
|  |  | ||||||
|  | for i, (input, toks) in enumerate(JsTokensTest.LEX_CASES): | ||||||
|  |     setattr(JsTokensTest, "test_case_%d" % i, make_function(input, toks)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | GETTEXT_CASES = ( | ||||||
|  |     ( | ||||||
|  |         r""" | ||||||
|  |             a = 1; /* /[0-9]+/ */ | ||||||
|  |             b = 0x2a0b / 1; // /[0-9]+/ | ||||||
|  |             c = 3; | ||||||
|  |         """, | ||||||
|  |         r""" | ||||||
|  |             a = 1; /* /[0-9]+/ */ | ||||||
|  |             b = 0x2a0b / 1; // /[0-9]+/ | ||||||
|  |             c = 3; | ||||||
|  |         """ | ||||||
|  |     ), ( | ||||||
|  |         r""" | ||||||
|  |             a = 1.234e-5; | ||||||
|  |             /* | ||||||
|  |              * /[0-9+/ | ||||||
|  |              */ | ||||||
|  |             b = .0123; | ||||||
|  |         """, | ||||||
|  |         r""" | ||||||
|  |             a = 1.234e-5; | ||||||
|  |             /* | ||||||
|  |              * /[0-9+/ | ||||||
|  |              */ | ||||||
|  |             b = .0123; | ||||||
|  |         """ | ||||||
|  |     ), ( | ||||||
|  |         r""" | ||||||
|  |             x = y / z; | ||||||
|  |             alert(gettext("hello")); | ||||||
|  |             x /= 3; | ||||||
|  |         """, | ||||||
|  |         r""" | ||||||
|  |             x = y / z; | ||||||
|  |             alert(gettext("hello")); | ||||||
|  |             x /= 3; | ||||||
|  |         """ | ||||||
|  |     ), ( | ||||||
|  |         r""" | ||||||
|  |             s = "Hello \"th/foo/ere\""; | ||||||
|  |             s = 'He\x23llo \'th/foo/ere\''; | ||||||
|  |             s = 'slash quote \", just quote "'; | ||||||
|  |         """, | ||||||
|  |         r""" | ||||||
|  |             s = "Hello \"th/foo/ere\""; | ||||||
|  |             s = "He\x23llo \'th/foo/ere\'"; | ||||||
|  |             s = "slash quote \", just quote \""; | ||||||
|  |         """ | ||||||
|  |     ), ( | ||||||
|  |         r""" | ||||||
|  |             s = "Line continuation\ | ||||||
|  |             continued /hello/ still the string";/hello/; | ||||||
|  |         """, | ||||||
|  |         r""" | ||||||
|  |             s = "Line continuation\ | ||||||
|  |             continued /hello/ still the string";"REGEX"; | ||||||
|  |         """ | ||||||
|  |     ), ( | ||||||
|  |         r""" | ||||||
|  |             var regex = /pattern/; | ||||||
|  |             var regex2 = /matter/gm; | ||||||
|  |             var regex3 = /[*/]+/gm.foo("hey"); | ||||||
|  |         """, | ||||||
|  |         r""" | ||||||
|  |             var regex = "REGEX"; | ||||||
|  |             var regex2 = "REGEX"; | ||||||
|  |             var regex3 = "REGEX".foo("hey"); | ||||||
|  |         """ | ||||||
|  |     ), ( | ||||||
|  |         r""" | ||||||
|  |             for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);} | ||||||
|  |             for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);} | ||||||
|  |         """, | ||||||
|  |         r""" | ||||||
|  |             for (var x = a in foo && "</x>" || mot ? z:"REGEX"/i) {xyz(x++);} | ||||||
|  |             for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y<"REGEX") {xyz(x++);} | ||||||
|  |         """ | ||||||
|  |     ), ( | ||||||
|  |         r""" | ||||||
|  |             \u1234xyz = gettext('Hello there'); | ||||||
|  |         """, r""" | ||||||
|  |             Uu1234xyz = gettext("Hello there"); | ||||||
|  |         """ | ||||||
|  |     ) | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class JsToCForGettextTest(TestCase): | ||||||
|  |     pass | ||||||
|  |  | ||||||
|  | def make_function(js, c): | ||||||
|  |     def test_func(self): | ||||||
|  |         self.assertMultiLineEqual(prepare_js_for_gettext(js), c) | ||||||
|  |     return test_func | ||||||
|  |  | ||||||
|  | for i, pair in enumerate(GETTEXT_CASES): | ||||||
|  |     setattr(JsToCForGettextTest, "test_case_%d" % i, make_function(*pair)) | ||||||
| @@ -18,3 +18,4 @@ from datastructures import * | |||||||
| from tzinfo import * | from tzinfo import * | ||||||
| from datetime_safe import * | from datetime_safe import * | ||||||
| from baseconv import * | from baseconv import * | ||||||
|  | from jslex import * | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user