1
0
mirror of https://github.com/django/django.git synced 2025-03-12 02:12:38 +00:00

Fixed #17375 -- Changed makemessages to use xgettext with --files-from

Changed the way makemessages invokes xgettext from one call per
translatable file to one call per locale directory (using --files-from).
This allows to avoid https://savannah.gnu.org/bugs/index.php?35027 and,
as a positive side effect, speeds up localization build.
This commit is contained in:
Sergey Kolosov 2015-08-25 02:28:23 +03:00 committed by Tim Graham
parent 7ac0cd445e
commit e75882332c
5 changed files with 226 additions and 104 deletions

View File

@ -11,6 +11,7 @@ from itertools import dropwhile
import django import django
from django.conf import settings from django.conf import settings
from django.core.files.temp import NamedTemporaryFile
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from django.core.management.utils import ( from django.core.management.utils import (
find_command, handle_extensions, popen_wrapper, find_command, handle_extensions, popen_wrapper,
@ -74,103 +75,96 @@ class TranslatableFile(object):
def path(self): def path(self):
return os.path.join(self.dirpath, self.file) return os.path.join(self.dirpath, self.file)
def process(self, command, domain):
"""
Extract translatable literals from self.file for :param domain:,
creating or updating the POT file.
Uses the xgettext GNU gettext utility. class BuildFile(object):
"""
Represents the state of a translatable file during the build process.
"""
def __init__(self, command, domain, translatable):
self.command = command
self.domain = domain
self.translatable = translatable
@cached_property
def is_templatized(self):
if self.domain == 'djangojs':
return self.command.gettext_version < (0, 18, 3)
elif self.domain == 'django':
file_ext = os.path.splitext(self.translatable.file)[1]
return file_ext != '.py'
return False
@cached_property
def path(self):
return self.translatable.path
@cached_property
def work_path(self):
"""
Path to a file which is being fed into GNU gettext pipeline. This may
be either a translatable or its preprocessed version.
"""
if not self.is_templatized:
return self.path
extension = {
'djangojs': 'c',
'django': 'py',
}.get(self.domain)
filename = '%s.%s' % (self.translatable.file, extension)
return os.path.join(self.translatable.dirpath, filename)
def preprocess(self):
"""
Preprocess (if necessary) a translatable file before passing it to
xgettext GNU gettext utility.
""" """
from django.utils.translation import templatize from django.utils.translation import templatize
if command.verbosity > 1: if not self.is_templatized:
command.stdout.write('processing file %s in %s\n' % (self.file, self.dirpath))
file_ext = os.path.splitext(self.file)[1]
if domain == 'djangojs':
orig_file = os.path.join(self.dirpath, self.file)
work_file = orig_file
is_templatized = command.gettext_version < (0, 18, 3)
if is_templatized:
with io.open(orig_file, 'r', encoding=settings.FILE_CHARSET) as fp:
src_data = fp.read()
src_data = prepare_js_for_gettext(src_data)
work_file = os.path.join(self.dirpath, '%s.c' % self.file)
with io.open(work_file, "w", encoding='utf-8') as fp:
fp.write(src_data)
args = [
'xgettext',
'-d', domain,
'--language=%s' % ('C' if is_templatized else 'JavaScript',),
'--keyword=gettext_noop',
'--keyword=gettext_lazy',
'--keyword=ngettext_lazy:1,2',
'--keyword=pgettext:1c,2',
'--keyword=npgettext:1c,2,3',
'--output=-'
] + command.xgettext_options
args.append(work_file)
elif domain == 'django':
orig_file = os.path.join(self.dirpath, self.file)
work_file = orig_file
is_templatized = file_ext != '.py'
if is_templatized:
with io.open(orig_file, encoding=settings.FILE_CHARSET) as fp:
src_data = fp.read()
content = templatize(src_data, orig_file[2:])
work_file = os.path.join(self.dirpath, '%s.py' % self.file)
with io.open(work_file, "w", encoding='utf-8') as fp:
fp.write(content)
args = [
'xgettext',
'-d', domain,
'--language=Python',
'--keyword=gettext_noop',
'--keyword=gettext_lazy',
'--keyword=ngettext_lazy:1,2',
'--keyword=ugettext_noop',
'--keyword=ugettext_lazy',
'--keyword=ungettext_lazy:1,2',
'--keyword=pgettext:1c,2',
'--keyword=npgettext:1c,2,3',
'--keyword=pgettext_lazy:1c,2',
'--keyword=npgettext_lazy:1c,2,3',
'--output=-'
] + command.xgettext_options
args.append(work_file)
else:
return return
msgs, errors, status = gettext_popen_wrapper(args)
if errors:
if status != STATUS_OK:
if is_templatized:
os.unlink(work_file)
raise CommandError(
"errors happened while running xgettext on %s\n%s" %
(self.file, errors))
elif command.verbosity > 0:
# Print warnings
command.stdout.write(errors)
if msgs:
# Write/append messages to pot file
if self.locale_dir is NO_LOCALE_DIR:
file_path = os.path.normpath(os.path.join(self.dirpath, self.file))
raise CommandError(
"Unable to find a locale path to store translations for file %s" % file_path)
potfile = os.path.join(self.locale_dir, '%s.pot' % str(domain))
if is_templatized:
# Remove '.py' suffix
if os.name == 'nt':
# Preserve '.\' prefix on Windows to respect gettext behavior
old = '#: ' + work_file
new = '#: ' + orig_file
else:
old = '#: ' + work_file[2:]
new = '#: ' + orig_file[2:]
msgs = msgs.replace(old, new)
write_pot_file(potfile, msgs)
if is_templatized: with io.open(self.path, 'r', encoding=settings.FILE_CHARSET) as fp:
os.unlink(work_file) src_data = fp.read()
if self.domain == 'djangojs':
content = prepare_js_for_gettext(src_data)
elif self.domain == 'django':
content = templatize(src_data, self.path[2:])
with io.open(self.work_path, 'w', encoding='utf-8') as fp:
fp.write(content)
def postprocess_messages(self, msgs):
"""
Postprocess messages generated by xgettext GNU gettext utility.
Transform paths as if these messages were generated from original
translatable files rather than from preprocessed versions.
"""
if not self.is_templatized:
return msgs
# Remove '.py' suffix
if os.name == 'nt':
# Preserve '.\' prefix on Windows to respect gettext behavior
old = '#: ' + self.work_path
new = '#: ' + self.path
else:
old = '#: ' + self.work_path[2:]
new = '#: ' + self.path[2:]
return msgs.replace(old, new)
def cleanup(self):
"""
Remove a preprocessed copy of a translatable file (if any).
"""
if self.is_templatized:
# This check is needed for the case of a symlinked file and its
# source being processed inside a single group (locale dir);
# removing either of those two removes both.
if os.path.exists(self.work_path):
os.unlink(self.work_path)
def write_pot_file(potfile, msgs): def write_pot_file(potfile, msgs):
@ -194,6 +188,9 @@ class Command(BaseCommand):
"applications) directory.\n\nYou must run this command with one of either the " "applications) directory.\n\nYou must run this command with one of either the "
"--locale, --exclude or --all options.") "--locale, --exclude or --all options.")
translatable_file_class = TranslatableFile
build_file_class = BuildFile
requires_system_checks = False requires_system_checks = False
leave_locale_alone = True leave_locale_alone = True
@ -353,18 +350,7 @@ class Command(BaseCommand):
""" """
file_list = self.find_files(".") file_list = self.find_files(".")
self.remove_potfiles() self.remove_potfiles()
for f in file_list: self.process_files(file_list)
try:
f.process(self, self.domain)
except UnicodeDecodeError as e:
self.stdout.write(
"UnicodeDecodeError: skipped file %s in %s (reason: %s)" % (
f.file,
f.dirpath,
e,
)
)
potfiles = [] potfiles = []
for path in self.locale_paths: for path in self.locale_paths:
potfile = os.path.join(path, '%s.pot' % str(self.domain)) potfile = os.path.join(path, '%s.pot' % str(self.domain))
@ -443,9 +429,116 @@ class Command(BaseCommand):
locale_dir = self.default_locale_path locale_dir = self.default_locale_path
if not locale_dir: if not locale_dir:
locale_dir = NO_LOCALE_DIR locale_dir = NO_LOCALE_DIR
all_files.append(TranslatableFile(dirpath, filename, locale_dir)) all_files.append(self.translatable_file_class(dirpath, filename, locale_dir))
return sorted(all_files) return sorted(all_files)
def process_files(self, file_list):
"""
Group translatable files by locale directory and run pot file build
process for each group.
"""
file_groups = {}
for translatable in file_list:
file_group = file_groups.setdefault(translatable.locale_dir, [])
file_group.append(translatable)
for locale_dir, files in file_groups.items():
self.process_locale_dir(locale_dir, files)
def process_locale_dir(self, locale_dir, files):
"""
Extract translatable literals from the specified files, creating or
updating the POT file for a given locale directory.
Uses the xgettext GNU gettext utility.
"""
build_files = []
for translatable in files:
if self.verbosity > 1:
self.stdout.write('processing file %s in %s\n' % (
translatable.file, translatable.dirpath
))
if self.domain not in ('djangojs', 'django'):
continue
build_file = self.build_file_class(self, self.domain, translatable)
try:
build_file.preprocess()
except UnicodeDecodeError as e:
self.stdout.write(
'UnicodeDecodeError: skipped file %s in %s (reason: %s)' % (
translatable.file, translatable.dirpath, e,
)
)
continue
build_files.append(build_file)
if self.domain == 'djangojs':
is_templatized = build_file.is_templatized
args = [
'xgettext',
'-d', self.domain,
'--language=%s' % ('C' if is_templatized else 'JavaScript',),
'--keyword=gettext_noop',
'--keyword=gettext_lazy',
'--keyword=ngettext_lazy:1,2',
'--keyword=pgettext:1c,2',
'--keyword=npgettext:1c,2,3',
'--output=-',
]
elif self.domain == 'django':
args = [
'xgettext',
'-d', self.domain,
'--language=Python',
'--keyword=gettext_noop',
'--keyword=gettext_lazy',
'--keyword=ngettext_lazy:1,2',
'--keyword=ugettext_noop',
'--keyword=ugettext_lazy',
'--keyword=ungettext_lazy:1,2',
'--keyword=pgettext:1c,2',
'--keyword=npgettext:1c,2,3',
'--keyword=pgettext_lazy:1c,2',
'--keyword=npgettext_lazy:1c,2,3',
'--output=-',
]
else:
return
input_files = [bf.work_path for bf in build_files]
with NamedTemporaryFile(mode='w+') as input_files_list:
input_files_list.write('\n'.join(input_files))
input_files_list.flush()
args.extend(['--files-from', input_files_list.name])
args.extend(self.xgettext_options)
msgs, errors, status = gettext_popen_wrapper(args)
if errors:
if status != STATUS_OK:
for build_file in build_files:
build_file.cleanup()
raise CommandError(
'errors happened while running xgettext on %s\n%s' %
('\n'.join(input_files), errors)
)
elif self.verbosity > 0:
# Print warnings
self.stdout.write(errors)
if msgs:
if locale_dir is NO_LOCALE_DIR:
file_path = os.path.normpath(build_files[0].path)
raise CommandError(
'Unable to find a locale path to store translations for '
'file %s' % file_path
)
for build_file in build_files:
msgs = build_file.postprocess_messages(msgs)
potfile = os.path.join(locale_dir, '%s.pot' % str(self.domain))
write_pot_file(potfile, msgs)
for build_file in build_files:
build_file.cleanup()
def write_po_file(self, potfile, locale): def write_po_file(self, potfile, locale):
""" """
Creates or updates the PO file for self.domain and :param locale:. Creates or updates the PO file for self.domain and :param locale:.

View File

@ -380,6 +380,9 @@ Internationalization
project and it will find all the app message files that were created by project and it will find all the app message files that were created by
:djadmin:`makemessages`. :djadmin:`makemessages`.
* :djadmin:`makemessages` now calls xgettext once per locale directory rather
than once per translatable file. This speeds up localization builds.
* :ttag:`blocktrans` supports assigning its output to a variable using * :ttag:`blocktrans` supports assigning its output to a variable using
``asvar``. ``asvar``.

View File

@ -4,3 +4,5 @@ This file has a literal with plural forms. When processed first, makemessages
shouldn't create a .po file with duplicate `Plural-Forms` headers shouldn't create a .po file with duplicate `Plural-Forms` headers
{% endcomment %} {% endcomment %}
{% blocktrans count number=3 %}{{ number }} Bar{% plural %}{{ number }} Bars{% endblocktrans %} {% blocktrans count number=3 %}{{ number }} Bar{% plural %}{{ number }} Bars{% endblocktrans %}
{% trans 'First `trans`, then `blocktrans` with a plural' %}

View File

@ -80,3 +80,9 @@ continued here.{% endcomment %}
should be trimmed. should be trimmed.
{% endblocktrans %} {% endblocktrans %}
{% trans "I'm on line 82" %} {% trans "I'm on line 82" %}
{% blocktrans trimmed count counter=mylist|length %}
First `trans`, then `blocktrans` with a plural
{% plural %}
Plural for a `trans` and `blocktrans` collision case
{% endblocktrans %}

View File

@ -80,6 +80,9 @@ class ExtractorTests(SimpleTestCase):
def assertMsgId(self, msgid, haystack, use_quotes=True): def assertMsgId(self, msgid, haystack, use_quotes=True):
return self._assertPoKeyword('msgid', msgid, haystack, use_quotes=use_quotes) return self._assertPoKeyword('msgid', msgid, haystack, use_quotes=use_quotes)
def assertMsgIdPlural(self, msgid, haystack, use_quotes=True):
return self._assertPoKeyword('msgid_plural', msgid, haystack, use_quotes=use_quotes)
def assertMsgStr(self, msgstr, haystack, use_quotes=True): def assertMsgStr(self, msgstr, haystack, use_quotes=True):
return self._assertPoKeyword('msgstr', msgstr, haystack, use_quotes=use_quotes) return self._assertPoKeyword('msgstr', msgstr, haystack, use_quotes=use_quotes)
@ -525,6 +528,21 @@ class CopyPluralFormsExtractorTests(ExtractorTests):
found = re.findall(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', po_contents, re.MULTILINE | re.DOTALL) found = re.findall(r'^(?P<value>"Plural-Forms.+?\\n")\s*$', po_contents, re.MULTILINE | re.DOTALL)
self.assertEqual(1, len(found)) self.assertEqual(1, len(found))
def test_trans_and_plural_blocktrans_collision(self):
"""
Ensures a correct workaround for the gettext bug when handling a literal
found inside a {% trans %} tag and also in another file inside a
{% blocktrans %} with a plural (#17375).
"""
os.chdir(self.test_dir)
management.call_command('makemessages', locale=[LOCALE], extensions=['html', 'djtpl'], verbosity=0)
self.assertTrue(os.path.exists(self.PO_FILE))
with open(self.PO_FILE, 'r') as fp:
po_contents = force_text(fp.read())
self.assertNotIn("#-#-#-#-# django.pot (PACKAGE VERSION) #-#-#-#-#\\n", po_contents)
self.assertMsgId('First `trans`, then `blocktrans` with a plural', po_contents)
self.assertMsgIdPlural('Plural for a `trans` and `blocktrans` collision case', po_contents)
class NoWrapExtractorTests(ExtractorTests): class NoWrapExtractorTests(ExtractorTests):