-
-
Notifications
You must be signed in to change notification settings - Fork 32.5k
gh-137353: Add t-string support to gettext + pygettext #137354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,9 +43,13 @@ | |
# you'll need to study the GNU gettext code to do this. | ||
|
||
|
||
import ast | ||
import operator | ||
import os | ||
import sys | ||
from functools import cache | ||
from string.templatelib import Interpolation, Template, convert | ||
from typing import Any | ||
|
||
|
||
__all__ = ['NullTranslations', 'GNUTranslations', 'Catalog', | ||
|
@@ -290,11 +294,23 @@ def add_fallback(self, fallback): | |
def gettext(self, message): | ||
if self._fallback: | ||
return self._fallback.gettext(message) | ||
if isinstance(message, Template): | ||
message, values = _template_to_format(message) | ||
return message.format(**values) | ||
return message | ||
|
||
def ngettext(self, msgid1, msgid2, n): | ||
if self._fallback: | ||
return self._fallback.ngettext(msgid1, msgid2, n) | ||
msgid1_is_template = isinstance(msgid1, Template) | ||
msgid2_is_template = isinstance(msgid2, Template) | ||
if msgid1_is_template and msgid2_is_template: | ||
message, values = _template_to_format( | ||
msgid1 if n == 1 else msgid2 | ||
) | ||
return message.format(**values) | ||
elif msgid1_is_template or msgid2_is_template: | ||
raise TypeError('msgids cannot mix strings and t-strings') | ||
n = _as_int2(n) | ||
if n == 1: | ||
return msgid1 | ||
|
@@ -304,11 +320,23 @@ def ngettext(self, msgid1, msgid2, n): | |
def pgettext(self, context, message): | ||
if self._fallback: | ||
return self._fallback.pgettext(context, message) | ||
if isinstance(message, Template): | ||
message, values = _template_to_format(message) | ||
return message.format(**values) | ||
return message | ||
|
||
def npgettext(self, context, msgid1, msgid2, n): | ||
if self._fallback: | ||
return self._fallback.npgettext(context, msgid1, msgid2, n) | ||
msgid1_is_template = isinstance(msgid1, Template) | ||
msgid2_is_template = isinstance(msgid2, Template) | ||
if msgid1_is_template and msgid2_is_template: | ||
message, values = _template_to_format( | ||
msgid1 if n == 1 else msgid2 | ||
) | ||
return message.format(**values) | ||
elif msgid1_is_template or msgid2_is_template: | ||
raise TypeError('msgids cannot mix strings and t-strings') | ||
n = _as_int2(n) | ||
if n == 1: | ||
return msgid1 | ||
|
@@ -437,50 +465,104 @@ def _parse(self, fp): | |
|
||
def gettext(self, message): | ||
missing = object() | ||
orig_message = message | ||
t_values = None | ||
if isinstance(message, Template): | ||
message, t_values = _template_to_format(message) | ||
tmsg = self._catalog.get(message, missing) | ||
if tmsg is missing: | ||
tmsg = self._catalog.get((message, self.plural(1)), missing) | ||
if tmsg is not missing: | ||
if t_values is not None: | ||
return tmsg.format(**t_values) | ||
return tmsg | ||
if self._fallback: | ||
return self._fallback.gettext(message) | ||
return self._fallback.gettext(orig_message) | ||
if t_values is not None: | ||
return message.format(**t_values) | ||
return message | ||
|
||
def ngettext(self, msgid1, msgid2, n): | ||
orig_msgid1 = msgid1 | ||
orig_msgid2 = msgid2 | ||
msgid1_is_template = isinstance(msgid1, Template) | ||
msgid2_is_template = isinstance(msgid2, Template) | ||
t_values1 = t_values2 = None | ||
if msgid1_is_template and msgid2_is_template: | ||
msgid1, t_values1 = _template_to_format(msgid1) | ||
msgid2, t_values2 = _template_to_format(msgid2) | ||
elif msgid1_is_template or msgid2_is_template: | ||
raise TypeError('msgids cannot mix strings and t-strings') | ||
plural = self.plural(n) | ||
t_values = t_values2 if plural else t_values1 | ||
try: | ||
tmsg = self._catalog[(msgid1, self.plural(n))] | ||
tmsg = self._catalog[(msgid1, plural)] | ||
except KeyError: | ||
if self._fallback: | ||
return self._fallback.ngettext(msgid1, msgid2, n) | ||
return self._fallback.ngettext(orig_msgid1, orig_msgid2, n) | ||
if n == 1: | ||
tmsg = msgid1 | ||
if t_values1 is not None: | ||
return msgid1.format(**t_values1) | ||
return msgid1 | ||
else: | ||
tmsg = msgid2 | ||
if t_values2 is not None: | ||
return msgid2.format(**t_values2) | ||
return msgid2 | ||
if t_values is not None: | ||
return tmsg.format(**t_values) | ||
return tmsg | ||
|
||
def pgettext(self, context, message): | ||
orig_message = message | ||
t_values = None | ||
if isinstance(message, Template): | ||
message, t_values = _template_to_format(message) | ||
ctxt_msg_id = self.CONTEXT % (context, message) | ||
missing = object() | ||
tmsg = self._catalog.get(ctxt_msg_id, missing) | ||
if tmsg is missing: | ||
tmsg = self._catalog.get((ctxt_msg_id, self.plural(1)), missing) | ||
if tmsg is not missing: | ||
if t_values is not None: | ||
return tmsg.format(**t_values) | ||
return tmsg | ||
if self._fallback: | ||
return self._fallback.pgettext(context, message) | ||
return self._fallback.pgettext(context, orig_message) | ||
if t_values is not None: | ||
return message.format(**t_values) | ||
return message | ||
|
||
def npgettext(self, context, msgid1, msgid2, n): | ||
orig_msgid1 = msgid1 | ||
orig_msgid2 = msgid2 | ||
msgid1_is_template = isinstance(msgid1, Template) | ||
msgid2_is_template = isinstance(msgid2, Template) | ||
t_values1 = t_values2 = None | ||
if msgid1_is_template and msgid2_is_template: | ||
msgid1, t_values1 = _template_to_format(msgid1) | ||
msgid2, t_values2 = _template_to_format(msgid2) | ||
elif msgid1_is_template or msgid2_is_template: | ||
raise TypeError('msgids cannot mix strings and t-strings') | ||
plural = self.plural(n) | ||
t_values = t_values2 if plural else t_values1 | ||
ctxt_msg_id = self.CONTEXT % (context, msgid1) | ||
try: | ||
tmsg = self._catalog[ctxt_msg_id, self.plural(n)] | ||
tmsg = self._catalog[ctxt_msg_id, plural] | ||
except KeyError: | ||
if self._fallback: | ||
return self._fallback.npgettext(context, msgid1, msgid2, n) | ||
return self._fallback.npgettext( | ||
context, orig_msgid1, orig_msgid2, n | ||
) | ||
if n == 1: | ||
tmsg = msgid1 | ||
if t_values1 is not None: | ||
return msgid1.format(**t_values1) | ||
return msgid1 | ||
else: | ||
tmsg = msgid2 | ||
if t_values2 is not None: | ||
return msgid2.format(**t_values2) | ||
return msgid2 | ||
if t_values is not None: | ||
return tmsg.format(**t_values) | ||
return tmsg | ||
|
||
|
||
|
@@ -655,3 +737,144 @@ def npgettext(context, msgid1, msgid2, n): | |
# gettext. | ||
|
||
Catalog = translation | ||
|
||
|
||
# utils for t-string handling in gettext translation + pygettext extraction | ||
# TBD where they should go, and whether this should be a public API or internal, | ||
# especially the part about generating names from interpolations which is IMHO | ||
# beneficial to have in stdlib so any implementation can re-use it without | ||
# risking diverging behavior for the same expression between implementations | ||
|
||
class _NameTooComplexError(ValueError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This however should be IMO documented, since it is “public”. I however don’t like this, I think a general (new) gettext error (or, much simpler, a ValueError) would be clearer, thoughts, Tomas? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Initially I just used this to avoid catching some other (unexpected) But I like the idea of a |
||
""" | ||
Raised when an expression is too complex to derive a format string name | ||
from it, or the resulting name would not be valid in a format string. | ||
""" | ||
|
||
|
||
class _ExtractNamesVisitor(ast.NodeVisitor): | ||
def __init__(self): | ||
self._name_parts = [] | ||
|
||
@property | ||
def name(self) -> str: | ||
name = '__'.join(self._name_parts) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used However, maybe using a single underscore would be fine here:
|
||
if not name.isidentifier(): | ||
raise _NameTooComplexError( | ||
'Only expressions which can be converted to a format string ' | ||
'placeholder may be used in a gettext call; assign the ' | ||
'expression to a variable and use that instead' | ||
) | ||
return name | ||
|
||
def generic_visit(self, node): | ||
name = node.__class__.__name__ | ||
raise _NameTooComplexError( | ||
f'Only simple expressions are supported, {name} is not allowed; ' | ||
'assign the expression to a variable and use that instead' | ||
) | ||
|
||
def visit_Attribute(self, node): | ||
self.visit(node.value) | ||
self._name_parts.append(node.attr) | ||
|
||
def visit_Name(self, node): | ||
self._name_parts.append(node.id) | ||
|
||
def visit_Subscript(self, node): | ||
self.visit(node.value) | ||
if not isinstance(node.slice, ast.Constant): | ||
raise _NameTooComplexError( | ||
'Only constant value dict keys may be used in a gettext call; ' | ||
'assign the expression to a variable and use that instead' | ||
) | ||
self.visit(node.slice) | ||
|
||
def visit_Constant(self, node): | ||
self._name_parts.append(str(node.value)) | ||
|
||
def visit_Call(self, node): | ||
self.visit(node.func) | ||
if node.args: | ||
raise _NameTooComplexError( | ||
'Function calls with arguments are not supported in gettext ' | ||
'calls; assign the result to a variable and use that instead' | ||
) | ||
|
||
|
||
def _template_node_to_format(node: ast.TemplateStr) -> str: | ||
"""Generate a format string from a template string AST node. | ||
This fails with a `_NameTooComplexError` in case the expression is not | ||
suitable for conversion. | ||
""" | ||
parts = [] | ||
interpolation_format_names = {} | ||
for child in node.values: | ||
match child: | ||
case ast.Constant(value): | ||
parts.append(value.replace('{', '{{').replace('}', '}}')) | ||
case ast.Interpolation(value): | ||
visitor = _ExtractNamesVisitor() | ||
visitor.visit(value) | ||
name = visitor.name | ||
expr = ast.unparse(value) | ||
if ( | ||
existing_expr := interpolation_format_names.get(name) | ||
) and existing_expr != expr: | ||
raise _NameTooComplexError( | ||
f'Interpolations of {existing_expr} and {expr} cannot ' | ||
'be mixed in the same gettext call; assign one of ' | ||
'them to a variable and use that instead' | ||
) | ||
interpolation_format_names[name] = expr | ||
parts.append(f'{{{name}}}') | ||
return ''.join(parts) | ||
|
||
|
||
def _template_to_format(template: Template) -> tuple[str, dict[str, Any]]: | ||
"""Convert a template to a format string and its value dict. | ||
This takes a :class:`~string.templatelib.Template`, and converts all the | ||
interpolations with format string placeholders derived from the original | ||
expression. | ||
This fails with a `_NameTooComplexError` in case the expression is not | ||
suitable for conversion. | ||
""" | ||
parts = [] | ||
interpolation_format_names = {} | ||
values = {} | ||
for item in template: | ||
match item: | ||
case str() as s: | ||
parts.append(s.replace('{', '{{').replace('}', '}}')) | ||
case Interpolation(value, expr, conversion, format_spec): | ||
value = convert(value, conversion) | ||
value = format(value, format_spec) | ||
name = _expr_to_format_field_name(expr) | ||
if ( | ||
existing_expr := interpolation_format_names.get(name) | ||
) and existing_expr != expr: | ||
raise _NameTooComplexError( | ||
f'Interpolations of {existing_expr} and {expr} cannot ' | ||
'be mixed in the same gettext call; assign one of ' | ||
'them to a variable and use that instead' | ||
) | ||
interpolation_format_names[name] = expr | ||
values[name] = value | ||
parts.append(f'{{{name}}}') | ||
return ''.join(parts), values | ||
|
||
|
||
@cache | ||
def _expr_to_format_field_name(expr: str) -> str: | ||
# handle simple cases w/o the overhead of dealing with an ast | ||
if expr.isidentifier(): | ||
return expr | ||
if all(x.isidentifier() for x in expr.split('.')): | ||
return '__'.join(expr.split('.')) | ||
expr_node = ast.parse(expr, mode='eval').body | ||
visitor = _ExtractNamesVisitor() | ||
visitor.visit(expr_node) | ||
return visitor.name |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should limit what is exposed in gettext apart from the core API.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that's why I underscored all of them for now.
FWIW, I think exposing at least the utils to convert a template string to a format string makes sense, because tools like Babel would need to use the exact same logic, or risk inconsistencies between implementations.