From e75882332c8593a1d5a47f9f1e099e9201b12296 Mon Sep 17 00:00:00 2001 From: Sergey Kolosov Date: Tue, 25 Aug 2015 02:28:23 +0300 Subject: [PATCH] Fixed #17375 -- Changed makemessages to use xgettext with --files-from Changed the way makemessages invokes xgettext from one call per translatable file to one call per locale directory (using --files-from). This allows to avoid https://savannah.gnu.org/bugs/index.php?35027 and, as a positive side effect, speeds up localization build. --- .../core/management/commands/makemessages.py | 301 ++++++++++++------ docs/releases/1.9.txt | 3 + tests/i18n/commands/templates/plural.djtpl | 2 + tests/i18n/commands/templates/test.html | 6 + tests/i18n/test_extraction.py | 18 ++ 5 files changed, 226 insertions(+), 104 deletions(-) diff --git a/django/core/management/commands/makemessages.py b/django/core/management/commands/makemessages.py index 37e1b8e0da..866e325816 100644 --- a/django/core/management/commands/makemessages.py +++ b/django/core/management/commands/makemessages.py @@ -11,6 +11,7 @@ from itertools import dropwhile import django from django.conf import settings +from django.core.files.temp import NamedTemporaryFile from django.core.management.base import BaseCommand, CommandError from django.core.management.utils import ( find_command, handle_extensions, popen_wrapper, @@ -74,103 +75,96 @@ class TranslatableFile(object): def path(self): return os.path.join(self.dirpath, self.file) - def process(self, command, domain): - """ - Extract translatable literals from self.file for :param domain:, - creating or updating the POT file. - Uses the xgettext GNU gettext utility. +class BuildFile(object): + """ + Represents the state of a translatable file during the build process. + """ + def __init__(self, command, domain, translatable): + self.command = command + self.domain = domain + self.translatable = translatable + + @cached_property + def is_templatized(self): + if self.domain == 'djangojs': + return self.command.gettext_version < (0, 18, 3) + elif self.domain == 'django': + file_ext = os.path.splitext(self.translatable.file)[1] + return file_ext != '.py' + return False + + @cached_property + def path(self): + return self.translatable.path + + @cached_property + def work_path(self): + """ + Path to a file which is being fed into GNU gettext pipeline. This may + be either a translatable or its preprocessed version. + """ + if not self.is_templatized: + return self.path + extension = { + 'djangojs': 'c', + 'django': 'py', + }.get(self.domain) + filename = '%s.%s' % (self.translatable.file, extension) + return os.path.join(self.translatable.dirpath, filename) + + def preprocess(self): + """ + Preprocess (if necessary) a translatable file before passing it to + xgettext GNU gettext utility. """ from django.utils.translation import templatize - if command.verbosity > 1: - command.stdout.write('processing file %s in %s\n' % (self.file, self.dirpath)) - file_ext = os.path.splitext(self.file)[1] - if domain == 'djangojs': - orig_file = os.path.join(self.dirpath, self.file) - work_file = orig_file - is_templatized = command.gettext_version < (0, 18, 3) - if is_templatized: - with io.open(orig_file, 'r', encoding=settings.FILE_CHARSET) as fp: - src_data = fp.read() - src_data = prepare_js_for_gettext(src_data) - work_file = os.path.join(self.dirpath, '%s.c' % self.file) - with io.open(work_file, "w", encoding='utf-8') as fp: - fp.write(src_data) - args = [ - 'xgettext', - '-d', domain, - '--language=%s' % ('C' if is_templatized else 'JavaScript',), - '--keyword=gettext_noop', - '--keyword=gettext_lazy', - '--keyword=ngettext_lazy:1,2', - '--keyword=pgettext:1c,2', - '--keyword=npgettext:1c,2,3', - '--output=-' - ] + command.xgettext_options - args.append(work_file) - elif domain == 'django': - orig_file = os.path.join(self.dirpath, self.file) - work_file = orig_file - is_templatized = file_ext != '.py' - if is_templatized: - with io.open(orig_file, encoding=settings.FILE_CHARSET) as fp: - src_data = fp.read() - content = templatize(src_data, orig_file[2:]) - work_file = os.path.join(self.dirpath, '%s.py' % self.file) - with io.open(work_file, "w", encoding='utf-8') as fp: - fp.write(content) - args = [ - 'xgettext', - '-d', domain, - '--language=Python', - '--keyword=gettext_noop', - '--keyword=gettext_lazy', - '--keyword=ngettext_lazy:1,2', - '--keyword=ugettext_noop', - '--keyword=ugettext_lazy', - '--keyword=ungettext_lazy:1,2', - '--keyword=pgettext:1c,2', - '--keyword=npgettext:1c,2,3', - '--keyword=pgettext_lazy:1c,2', - '--keyword=npgettext_lazy:1c,2,3', - '--output=-' - ] + command.xgettext_options - args.append(work_file) - else: + if not self.is_templatized: return - msgs, errors, status = gettext_popen_wrapper(args) - if errors: - if status != STATUS_OK: - if is_templatized: - os.unlink(work_file) - raise CommandError( - "errors happened while running xgettext on %s\n%s" % - (self.file, errors)) - elif command.verbosity > 0: - # Print warnings - command.stdout.write(errors) - if msgs: - # Write/append messages to pot file - if self.locale_dir is NO_LOCALE_DIR: - file_path = os.path.normpath(os.path.join(self.dirpath, self.file)) - raise CommandError( - "Unable to find a locale path to store translations for file %s" % file_path) - potfile = os.path.join(self.locale_dir, '%s.pot' % str(domain)) - if is_templatized: - # Remove '.py' suffix - if os.name == 'nt': - # Preserve '.\' prefix on Windows to respect gettext behavior - old = '#: ' + work_file - new = '#: ' + orig_file - else: - old = '#: ' + work_file[2:] - new = '#: ' + orig_file[2:] - msgs = msgs.replace(old, new) - write_pot_file(potfile, msgs) - if is_templatized: - os.unlink(work_file) + with io.open(self.path, 'r', encoding=settings.FILE_CHARSET) as fp: + src_data = fp.read() + + if self.domain == 'djangojs': + content = prepare_js_for_gettext(src_data) + elif self.domain == 'django': + content = templatize(src_data, self.path[2:]) + + with io.open(self.work_path, 'w', encoding='utf-8') as fp: + fp.write(content) + + def postprocess_messages(self, msgs): + """ + Postprocess messages generated by xgettext GNU gettext utility. + + Transform paths as if these messages were generated from original + translatable files rather than from preprocessed versions. + """ + if not self.is_templatized: + return msgs + + # Remove '.py' suffix + if os.name == 'nt': + # Preserve '.\' prefix on Windows to respect gettext behavior + old = '#: ' + self.work_path + new = '#: ' + self.path + else: + old = '#: ' + self.work_path[2:] + new = '#: ' + self.path[2:] + + return msgs.replace(old, new) + + def cleanup(self): + """ + Remove a preprocessed copy of a translatable file (if any). + """ + if self.is_templatized: + # This check is needed for the case of a symlinked file and its + # source being processed inside a single group (locale dir); + # removing either of those two removes both. + if os.path.exists(self.work_path): + os.unlink(self.work_path) def write_pot_file(potfile, msgs): @@ -194,6 +188,9 @@ class Command(BaseCommand): "applications) directory.\n\nYou must run this command with one of either the " "--locale, --exclude or --all options.") + translatable_file_class = TranslatableFile + build_file_class = BuildFile + requires_system_checks = False leave_locale_alone = True @@ -353,18 +350,7 @@ class Command(BaseCommand): """ file_list = self.find_files(".") self.remove_potfiles() - for f in file_list: - try: - f.process(self, self.domain) - except UnicodeDecodeError as e: - self.stdout.write( - "UnicodeDecodeError: skipped file %s in %s (reason: %s)" % ( - f.file, - f.dirpath, - e, - ) - ) - + self.process_files(file_list) potfiles = [] for path in self.locale_paths: potfile = os.path.join(path, '%s.pot' % str(self.domain)) @@ -443,9 +429,116 @@ class Command(BaseCommand): locale_dir = self.default_locale_path if not locale_dir: locale_dir = NO_LOCALE_DIR - all_files.append(TranslatableFile(dirpath, filename, locale_dir)) + all_files.append(self.translatable_file_class(dirpath, filename, locale_dir)) return sorted(all_files) + def process_files(self, file_list): + """ + Group translatable files by locale directory and run pot file build + process for each group. + """ + file_groups = {} + for translatable in file_list: + file_group = file_groups.setdefault(translatable.locale_dir, []) + file_group.append(translatable) + for locale_dir, files in file_groups.items(): + self.process_locale_dir(locale_dir, files) + + def process_locale_dir(self, locale_dir, files): + """ + Extract translatable literals from the specified files, creating or + updating the POT file for a given locale directory. + + Uses the xgettext GNU gettext utility. + """ + build_files = [] + for translatable in files: + if self.verbosity > 1: + self.stdout.write('processing file %s in %s\n' % ( + translatable.file, translatable.dirpath + )) + if self.domain not in ('djangojs', 'django'): + continue + build_file = self.build_file_class(self, self.domain, translatable) + try: + build_file.preprocess() + except UnicodeDecodeError as e: + self.stdout.write( + 'UnicodeDecodeError: skipped file %s in %s (reason: %s)' % ( + translatable.file, translatable.dirpath, e, + ) + ) + continue + build_files.append(build_file) + + if self.domain == 'djangojs': + is_templatized = build_file.is_templatized + args = [ + 'xgettext', + '-d', self.domain, + '--language=%s' % ('C' if is_templatized else 'JavaScript',), + '--keyword=gettext_noop', + '--keyword=gettext_lazy', + '--keyword=ngettext_lazy:1,2', + '--keyword=pgettext:1c,2', + '--keyword=npgettext:1c,2,3', + '--output=-', + ] + elif self.domain == 'django': + args = [ + 'xgettext', + '-d', self.domain, + '--language=Python', + '--keyword=gettext_noop', + '--keyword=gettext_lazy', + '--keyword=ngettext_lazy:1,2', + '--keyword=ugettext_noop', + '--keyword=ugettext_lazy', + '--keyword=ungettext_lazy:1,2', + '--keyword=pgettext:1c,2', + '--keyword=npgettext:1c,2,3', + '--keyword=pgettext_lazy:1c,2', + '--keyword=npgettext_lazy:1c,2,3', + '--output=-', + ] + else: + return + + input_files = [bf.work_path for bf in build_files] + with NamedTemporaryFile(mode='w+') as input_files_list: + input_files_list.write('\n'.join(input_files)) + input_files_list.flush() + args.extend(['--files-from', input_files_list.name]) + args.extend(self.xgettext_options) + msgs, errors, status = gettext_popen_wrapper(args) + + if errors: + if status != STATUS_OK: + for build_file in build_files: + build_file.cleanup() + raise CommandError( + 'errors happened while running xgettext on %s\n%s' % + ('\n'.join(input_files), errors) + ) + elif self.verbosity > 0: + # Print warnings + self.stdout.write(errors) + + if msgs: + if locale_dir is NO_LOCALE_DIR: + file_path = os.path.normpath(build_files[0].path) + raise CommandError( + 'Unable to find a locale path to store translations for ' + 'file %s' % file_path + ) + for build_file in build_files: + msgs = build_file.postprocess_messages(msgs) + potfile = os.path.join(locale_dir, '%s.pot' % str(self.domain)) + write_pot_file(potfile, msgs) + + for build_file in build_files: + build_file.cleanup() + def write_po_file(self, potfile, locale): """ Creates or updates the PO file for self.domain and :param locale:. diff --git a/docs/releases/1.9.txt b/docs/releases/1.9.txt index 2eace132e7..9a3ae71ac9 100644 --- a/docs/releases/1.9.txt +++ b/docs/releases/1.9.txt @@ -380,6 +380,9 @@ Internationalization project and it will find all the app message files that were created by :djadmin:`makemessages`. +* :djadmin:`makemessages` now calls xgettext once per locale directory rather + than once per translatable file. This speeds up localization builds. + * :ttag:`blocktrans` supports assigning its output to a variable using ``asvar``. diff --git a/tests/i18n/commands/templates/plural.djtpl b/tests/i18n/commands/templates/plural.djtpl index b2f2a18750..2d1566f7eb 100644 --- a/tests/i18n/commands/templates/plural.djtpl +++ b/tests/i18n/commands/templates/plural.djtpl @@ -4,3 +4,5 @@ This file has a literal with plural forms. When processed first, makemessages shouldn't create a .po file with duplicate `Plural-Forms` headers {% endcomment %} {% blocktrans count number=3 %}{{ number }} Bar{% plural %}{{ number }} Bars{% endblocktrans %} + +{% trans 'First `trans`, then `blocktrans` with a plural' %} diff --git a/tests/i18n/commands/templates/test.html b/tests/i18n/commands/templates/test.html index 9901415496..ad64dd741f 100644 --- a/tests/i18n/commands/templates/test.html +++ b/tests/i18n/commands/templates/test.html @@ -80,3 +80,9 @@ continued here.{% endcomment %} should be trimmed. {% endblocktrans %} {% trans "I'm on line 82" %} + +{% blocktrans trimmed count counter=mylist|length %} +First `trans`, then `blocktrans` with a plural +{% plural %} +Plural for a `trans` and `blocktrans` collision case +{% endblocktrans %} diff --git a/tests/i18n/test_extraction.py b/tests/i18n/test_extraction.py index 62d9361b9f..9a665ec0de 100644 --- a/tests/i18n/test_extraction.py +++ b/tests/i18n/test_extraction.py @@ -80,6 +80,9 @@ class ExtractorTests(SimpleTestCase): def assertMsgId(self, msgid, haystack, use_quotes=True): return self._assertPoKeyword('msgid', msgid, haystack, use_quotes=use_quotes) + def assertMsgIdPlural(self, msgid, haystack, use_quotes=True): + return self._assertPoKeyword('msgid_plural', msgid, haystack, use_quotes=use_quotes) + def assertMsgStr(self, msgstr, haystack, use_quotes=True): return self._assertPoKeyword('msgstr', msgstr, haystack, use_quotes=use_quotes) @@ -525,6 +528,21 @@ class CopyPluralFormsExtractorTests(ExtractorTests): found = re.findall(r'^(?P"Plural-Forms.+?\\n")\s*$', po_contents, re.MULTILINE | re.DOTALL) self.assertEqual(1, len(found)) + def test_trans_and_plural_blocktrans_collision(self): + """ + Ensures a correct workaround for the gettext bug when handling a literal + found inside a {% trans %} tag and also in another file inside a + {% blocktrans %} with a plural (#17375). + """ + os.chdir(self.test_dir) + management.call_command('makemessages', locale=[LOCALE], extensions=['html', 'djtpl'], verbosity=0) + self.assertTrue(os.path.exists(self.PO_FILE)) + with open(self.PO_FILE, 'r') as fp: + po_contents = force_text(fp.read()) + self.assertNotIn("#-#-#-#-# django.pot (PACKAGE VERSION) #-#-#-#-#\\n", po_contents) + self.assertMsgId('First `trans`, then `blocktrans` with a plural', po_contents) + self.assertMsgIdPlural('Plural for a `trans` and `blocktrans` collision case', po_contents) + class NoWrapExtractorTests(ExtractorTests):