Fixed #17375 -- Changed makemessages to use xgettext with --files-from

Changed the way makemessages invokes xgettext from one call per translatable file to one call per locale directory (using --files-from). This allows to avoid https://savannah.gnu.org/bugs/index.php?35027 and, as a positive side effect, speeds up localization build.
2025-03-12 02:12:38 +00:00 · 2015-08-25 02:28:23 +03:00 · 2015-08-25 02:28:23 +03:00 · e75882332c
commit e75882332c
parent 7ac0cd445e
5 changed files with 226 additions and 104 deletions
--- a/django/core/management/commands/makemessages.py
+++ b/django/core/management/commands/makemessages.py
@ -11,6 +11,7 @@ from itertools import dropwhile
 import django
 from django.conf import settings
 from django.core.files.temp import NamedTemporaryFile
 from django.core.management.base import BaseCommand, CommandError
 from django.core.management.utils import (
    find_command, handle_extensions, popen_wrapper,
@ -74,103 +75,96 @@ class TranslatableFile(object):
    def path(self):
        return os.path.join(self.dirpath, self.file)
    def process(self, command, domain):
        """
        Extract translatable literals from self.file for :param domain:,
        creating or updating the POT file.
-        Uses the xgettext GNU gettext utility.
+class BuildFile(object):
    """
    Represents the state of a translatable file during the build process.
    """
    def __init__(self, command, domain, translatable):
        self.command = command
        self.domain = domain
        self.translatable = translatable
    @cached_property
    def is_templatized(self):
        if self.domain == 'djangojs':
            return self.command.gettext_version < (0, 18, 3)
        elif self.domain == 'django':
            file_ext = os.path.splitext(self.translatable.file)[1]
            return file_ext != '.py'
        return False
    @cached_property
    def path(self):
        return self.translatable.path
    @cached_property
    def work_path(self):
        """
        Path to a file which is being fed into GNU gettext pipeline. This may
        be either a translatable or its preprocessed version.
        """
        if not self.is_templatized:
            return self.path
        extension = {
            'djangojs': 'c',
            'django': 'py',
        }.get(self.domain)
        filename = '%s.%s' % (self.translatable.file, extension)
        return os.path.join(self.translatable.dirpath, filename)
    def preprocess(self):
        """
        Preprocess (if necessary) a translatable file before passing it to
        xgettext GNU gettext utility.
        """
        from django.utils.translation import templatize
-        if command.verbosity > 1:
+        if not self.is_templatized:
            command.stdout.write('processing file %s in %s\n' % (self.file, self.dirpath))
        file_ext = os.path.splitext(self.file)[1]
        if domain == 'djangojs':
            orig_file = os.path.join(self.dirpath, self.file)
            work_file = orig_file
            is_templatized = command.gettext_version < (0, 18, 3)
            if is_templatized:
                with io.open(orig_file, 'r', encoding=settings.FILE_CHARSET) as fp:
                    src_data = fp.read()
                src_data = prepare_js_for_gettext(src_data)
                work_file = os.path.join(self.dirpath, '%s.c' % self.file)
                with io.open(work_file, "w", encoding='utf-8') as fp:
                    fp.write(src_data)
            args = [
                'xgettext',
                '-d', domain,
                '--language=%s' % ('C' if is_templatized else 'JavaScript',),
                '--keyword=gettext_noop',
                '--keyword=gettext_lazy',
                '--keyword=ngettext_lazy:1,2',
                '--keyword=pgettext:1c,2',
                '--keyword=npgettext:1c,2,3',
                '--output=-'
            ] + command.xgettext_options
            args.append(work_file)
        elif domain == 'django':
            orig_file = os.path.join(self.dirpath, self.file)
            work_file = orig_file
            is_templatized = file_ext != '.py'
            if is_templatized:
                with io.open(orig_file, encoding=settings.FILE_CHARSET) as fp:
                    src_data = fp.read()
                content = templatize(src_data, orig_file[2:])
                work_file = os.path.join(self.dirpath, '%s.py' % self.file)
                with io.open(work_file, "w", encoding='utf-8') as fp:
                    fp.write(content)
            args = [
                'xgettext',
                '-d', domain,
                '--language=Python',
                '--keyword=gettext_noop',
                '--keyword=gettext_lazy',
                '--keyword=ngettext_lazy:1,2',
                '--keyword=ugettext_noop',
                '--keyword=ugettext_lazy',
                '--keyword=ungettext_lazy:1,2',
                '--keyword=pgettext:1c,2',
                '--keyword=npgettext:1c,2,3',
                '--keyword=pgettext_lazy:1c,2',
                '--keyword=npgettext_lazy:1c,2,3',
                '--output=-'
            ] + command.xgettext_options
            args.append(work_file)
        else:
            return
        msgs, errors, status = gettext_popen_wrapper(args)
        if errors:
            if status != STATUS_OK:
                if is_templatized:
                    os.unlink(work_file)
                raise CommandError(
                    "errors happened while running xgettext on %s\n%s" %
                    (self.file, errors))
            elif command.verbosity > 0:
                # Print warnings
                command.stdout.write(errors)
        if msgs:
            # Write/append messages to pot file
            if self.locale_dir is NO_LOCALE_DIR:
                file_path = os.path.normpath(os.path.join(self.dirpath, self.file))
                raise CommandError(
                    "Unable to find a locale path to store translations for file %s" % file_path)
            potfile = os.path.join(self.locale_dir, '%s.pot' % str(domain))
            if is_templatized:
                # Remove '.py' suffix
                if os.name == 'nt':
                    # Preserve '.\' prefix on Windows to respect gettext behavior
                    old = '#: ' + work_file
                    new = '#: ' + orig_file
                else:
                    old = '#: ' + work_file[2:]
                    new = '#: ' + orig_file[2:]
                msgs = msgs.replace(old, new)
            write_pot_file(potfile, msgs)
-        if is_templatized:
+        with io.open(self.path, 'r', encoding=settings.FILE_CHARSET) as fp:
-            os.unlink(work_file)
+            src_data = fp.read()
        if self.domain == 'djangojs':
            content = prepare_js_for_gettext(src_data)
        elif self.domain == 'django':
            content = templatize(src_data, self.path[2:])
        with io.open(self.work_path, 'w', encoding='utf-8') as fp:
            fp.write(content)
    def postprocess_messages(self, msgs):
        """
        Postprocess messages generated by xgettext GNU gettext utility.
        Transform paths as if these messages were generated from original
        translatable files rather than from preprocessed versions.
        """
        if not self.is_templatized:
            return msgs
        # Remove '.py' suffix
        if os.name == 'nt':
            # Preserve '.\' prefix on Windows to respect gettext behavior
            old = '#: ' + self.work_path
            new = '#: ' + self.path
        else:
            old = '#: ' + self.work_path[2:]
            new = '#: ' + self.path[2:]
        return msgs.replace(old, new)
    def cleanup(self):
        """
        Remove a preprocessed copy of a translatable file (if any).
        """
        if self.is_templatized:
            # This check is needed for the case of a symlinked file and its
            # source being processed inside a single group (locale dir);
            # removing either of those two removes both.
            if os.path.exists(self.work_path):
                os.unlink(self.work_path)
 def write_pot_file(potfile, msgs):
@ -194,6 +188,9 @@ class Command(BaseCommand):
 "applications) directory.\n\nYou must run this command with one of either the "
 "--locale, --exclude or --all options.")
    translatable_file_class = TranslatableFile
    build_file_class = BuildFile
    requires_system_checks = False
    leave_locale_alone = True
@ -353,18 +350,7 @@ class Command(BaseCommand):
        """
        file_list = self.find_files(".")
        self.remove_potfiles()
-        for f in file_list:
+        self.process_files(file_list)
            try:
                f.process(self, self.domain)
            except UnicodeDecodeError as e:
                self.stdout.write(
                    "UnicodeDecodeError: skipped file %s in %s (reason: %s)" % (
                        f.file,
                        f.dirpath,
                        e,
                    )
                )
        potfiles = []
        for path in self.locale_paths:
            potfile = os.path.join(path, '%s.pot' % str(self.domain))
@ -443,9 +429,116 @@ class Command(BaseCommand):
                        locale_dir = self.default_locale_path
                    if not locale_dir:
                        locale_dir = NO_LOCALE_DIR
-                    all_files.append(TranslatableFile(dirpath, filename, locale_dir))
+                    all_files.append(self.translatable_file_class(dirpath, filename, locale_dir))
        return sorted(all_files)
    def process_files(self, file_list):
        """
        Group translatable files by locale directory and run pot file build
        process for each group.
        """
        file_groups = {}
        for translatable in file_list:
            file_group = file_groups.setdefault(translatable.locale_dir, [])
            file_group.append(translatable)
        for locale_dir, files in file_groups.items():
            self.process_locale_dir(locale_dir, files)
    def process_locale_dir(self, locale_dir, files):
        """
        Extract translatable literals from the specified files, creating or
        updating the POT file for a given locale directory.
        Uses the xgettext GNU gettext utility.
        """
        build_files = []
        for translatable in files:
            if self.verbosity > 1:
                self.stdout.write('processing file %s in %s\n' % (
                    translatable.file, translatable.dirpath
                ))
            if self.domain not in ('djangojs', 'django'):
                continue
            build_file = self.build_file_class(self, self.domain, translatable)
            try:
                build_file.preprocess()
            except UnicodeDecodeError as e:
                self.stdout.write(
                    'UnicodeDecodeError: skipped file %s in %s (reason: %s)' % (
                        translatable.file, translatable.dirpath, e,
                    )
                )
                continue
            build_files.append(build_file)
        if self.domain == 'djangojs':
            is_templatized = build_file.is_templatized
            args = [
                'xgettext',
                '-d', self.domain,
                '--language=%s' % ('C' if is_templatized else 'JavaScript',),
                '--keyword=gettext_noop',
                '--keyword=gettext_lazy',
                '--keyword=ngettext_lazy:1,2',
                '--keyword=pgettext:1c,2',
                '--keyword=npgettext:1c,2,3',
                '--output=-',
            ]
        elif self.domain == 'django':
            args = [
                'xgettext',
                '-d', self.domain,
                '--language=Python',
                '--keyword=gettext_noop',
                '--keyword=gettext_lazy',
                '--keyword=ngettext_lazy:1,2',
                '--keyword=ugettext_noop',
                '--keyword=ugettext_lazy',
                '--keyword=ungettext_lazy:1,2',
                '--keyword=pgettext:1c,2',
                '--keyword=npgettext:1c,2,3',
                '--keyword=pgettext_lazy:1c,2',
                '--keyword=npgettext_lazy:1c,2,3',
                '--output=-',
            ]
        else:
            return
        input_files = [bf.work_path for bf in build_files]
        with NamedTemporaryFile(mode='w+') as input_files_list:
            input_files_list.write('\n'.join(input_files))
            input_files_list.flush()
            args.extend(['--files-from', input_files_list.name])
            args.extend(self.xgettext_options)
            msgs, errors, status = gettext_popen_wrapper(args)
        if errors:
            if status != STATUS_OK:
                for build_file in build_files:
                    build_file.cleanup()
                raise CommandError(
                    'errors happened while running xgettext on %s\n%s' %
                    ('\n'.join(input_files), errors)
                )
            elif self.verbosity > 0:
                # Print warnings
                self.stdout.write(errors)
        if msgs:
            if locale_dir is NO_LOCALE_DIR:
                file_path = os.path.normpath(build_files[0].path)
                raise CommandError(
                    'Unable to find a locale path to store translations for '
                    'file %s' % file_path
                )
            for build_file in build_files:
                msgs = build_file.postprocess_messages(msgs)
            potfile = os.path.join(locale_dir, '%s.pot' % str(self.domain))
            write_pot_file(potfile, msgs)
        for build_file in build_files:
            build_file.cleanup()
    def write_po_file(self, potfile, locale):
        """
        Creates or updates the PO file for self.domain and :param locale:.
--- a/docs/releases/1.9.txt
+++ b/docs/releases/1.9.txt
@ -380,6 +380,9 @@ Internationalization
  project and it will find all the app message files that were created by
  :djadmin:`makemessages`.
 * :djadmin:`makemessages` now calls xgettext once per locale directory rather
  than once per translatable file. This speeds up localization builds.
 * :ttag:`blocktrans` supports assigning its output to a variable using
  ``asvar``.
--- a/tests/i18n/commands/templates/plural.djtpl
+++ b/tests/i18n/commands/templates/plural.djtpl
@ -4,3 +4,5 @@ This file has a literal with plural forms. When processed first, makemessages
 shouldn't create a .po file with duplicate `Plural-Forms` headers
 {% endcomment %}
 {% blocktrans count number=3 %}{{ number }} Bar{% plural %}{{ number }} Bars{% endblocktrans %}
 {% trans 'First `trans`, then `blocktrans` with a plural' %}
--- a/tests/i18n/commands/templates/test.html
+++ b/tests/i18n/commands/templates/test.html
@ -80,3 +80,9 @@ continued here.{% endcomment %}
  should be trimmed.
 {% endblocktrans %}
 {% trans "I'm on line 82" %}
 {% blocktrans trimmed count counter=mylist|length %}
 First `trans`, then `blocktrans` with a plural
 {% plural %}
 Plural for a `trans` and `blocktrans` collision case
 {% endblocktrans %}
--- a/tests/i18n/test_extraction.py
+++ b/tests/i18n/test_extraction.py
@ -80,6 +80,9 @@ class ExtractorTests(SimpleTestCase):
    def assertMsgId(self, msgid, haystack, use_quotes=True):
        return self._assertPoKeyword('msgid', msgid, haystack, use_quotes=use_quotes)
    def assertMsgIdPlural(self, msgid, haystack, use_quotes=True):
        return self._assertPoKeyword('msgid_plural', msgid, haystack, use_quotes=use_quotes)
    def assertMsgStr(self, msgstr, haystack, use_quotes=True):
        return self._assertPoKeyword('msgstr', msgstr, haystack, use_quotes=use_quotes)
@ -525,6 +528,21 @@ class CopyPluralFormsExtractorTests(ExtractorTests):
            found = re.findall(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', po_contents, re.MULTILINE | re.DOTALL)
            self.assertEqual(1, len(found))
    def test_trans_and_plural_blocktrans_collision(self):
        """
        Ensures a correct workaround for the gettext bug when handling a literal
        found inside a {% trans %} tag and also in another file inside a
        {% blocktrans %} with a plural (#17375).
        """
        os.chdir(self.test_dir)
        management.call_command('makemessages', locale=[LOCALE], extensions=['html', 'djtpl'], verbosity=0)
        self.assertTrue(os.path.exists(self.PO_FILE))
        with open(self.PO_FILE, 'r') as fp:
            po_contents = force_text(fp.read())
            self.assertNotIn("#-#-#-#-#  django.pot (PACKAGE VERSION)  #-#-#-#-#\\n", po_contents)
            self.assertMsgId('First `trans`, then `blocktrans` with a plural', po_contents)
            self.assertMsgIdPlural('Plural for a `trans` and `blocktrans` collision case', po_contents)
 class NoWrapExtractorTests(ExtractorTests):