Skip to content

gh-104400: pygettext: use an AST parser instead of a tokenizer #104402

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 40 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ce99920
Move test_i18n into a separate folder
tomasr8 May 10, 2023
4234c0b
Switch to AST-based message extraction
tomasr8 May 10, 2023
d431951
Add news entry
tomasr8 May 11, 2023
5721857
Merge branch 'main' into better-pygettext
tomasr8 Jun 12, 2023
42277e8
Fix comment
tomasr8 Jun 13, 2023
03f698f
Merge branch 'main' into better-pygettext
tomasr8 Jun 13, 2023
705b608
Merge branch 'main' into better-pygettext
tomasr8 Jun 15, 2023
a16274f
Merge branch 'main' into better-pygettext
tomasr8 Jun 16, 2023
f291862
Merge branch 'main' into better-pygettext
tomasr8 Jul 19, 2023
ca4cd02
Merge remote-tracking branch 'upstream/main' into better-pygettext
tomasr8 Nov 29, 2024
22c44b4
Fix conflicts
tomasr8 Nov 29, 2024
5625422
Remove unrelated changes
tomasr8 Nov 29, 2024
a80d92e
Use match-case
tomasr8 Nov 29, 2024
7fe3df5
Reorder methods
tomasr8 Nov 29, 2024
a6b1d54
Test f-strings
tomasr8 Nov 29, 2024
46eba7a
Improve error messages
tomasr8 Nov 29, 2024
424ad6a
Update news entry
tomasr8 Nov 30, 2024
7d58283
Fix error messages
tomasr8 Nov 30, 2024
6e7ca58
Add tests for error messages
tomasr8 Nov 30, 2024
7cfa879
Normalize line endings on Windows
tomasr8 Nov 30, 2024
53e0664
Merge branch 'main' into better-pygettext
AA-Turner Feb 2, 2025
c85bc8e
Merge branch 'main' into better-pygettext
AA-Turner Feb 2, 2025
5170691
Merge remote-tracking branch 'upstream/main' into better-pygettext
tomasr8 Feb 5, 2025
de52a20
Simplify docstring extraction
tomasr8 Feb 5, 2025
7caef48
:seal:
tomasr8 Feb 5, 2025
2409f72
Readability improvements
tomasr8 Feb 5, 2025
3a84af8
Fix tests
tomasr8 Feb 5, 2025
7f9c244
Remove unused method
tomasr8 Feb 5, 2025
3efa0ba
PEP8 fixes
tomasr8 Feb 5, 2025
3f22a99
Use f-strings
tomasr8 Feb 5, 2025
9856868
Simplifications
tomasr8 Feb 5, 2025
84e2d24
Add a comment
tomasr8 Feb 6, 2025
006e4a8
Remove walrus
tomasr8 Feb 6, 2025
d684780
Remove redundant function
tomasr8 Feb 6, 2025
084405f
Add visit_file to GettextVisitor
tomasr8 Feb 6, 2025
1ad8d76
Simplify reading files
tomasr8 Feb 6, 2025
29ec497
Reject calls with var-positional arguments
tomasr8 Feb 6, 2025
621cf01
Use more specific visit functions
tomasr8 Feb 6, 2025
a3e866d
Use aliases for some visit methods
tomasr8 Feb 6, 2025
4d51b08
Remove walrus
tomasr8 Feb 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions Lib/test/test_tools/i18n_data/docstrings.pot
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,40 @@ msgstr ""
"Generated-By: pygettext.py 1.5\n"


#: docstrings.py:7
#: docstrings.py:1
#, docstring
msgid "Module docstring"
msgstr ""

#: docstrings.py:9
#, docstring
msgid ""
msgstr ""

#: docstrings.py:18
#: docstrings.py:15
#, docstring
msgid "docstring"
msgstr ""

#: docstrings.py:20
#, docstring
msgid ""
"multiline\n"
" docstring\n"
" "
"docstring"
msgstr ""

#: docstrings.py:25
#: docstrings.py:27
#, docstring
msgid "docstring1"
msgstr ""

#: docstrings.py:30
#: docstrings.py:38
#, docstring
msgid "nested docstring"
msgstr ""

#: docstrings.py:43
#, docstring
msgid "Hello, {}!"
msgid "nested class docstring"
msgstr ""

12 changes: 7 additions & 5 deletions Lib/test/test_tools/i18n_data/docstrings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Module docstring"""

# Test docstring extraction
from gettext import gettext as _

Expand All @@ -10,10 +12,10 @@ def test(x):
# Leading empty line
def test2(x):

"""docstring""" # XXX This should be extracted but isn't.
"""docstring"""


# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`.
# Multiline docstrings are cleaned with `inspect.cleandoc`.
def test3(x):
"""multiline
docstring
Expand All @@ -27,15 +29,15 @@ def test4(x):


def test5(x):
"""Hello, {}!""".format("world!") # XXX This should not be extracted.
"""Hello, {}!""".format("world!") # This should not be extracted.


# Nested docstrings
def test6(x):
def inner(y):
"""nested docstring""" # XXX This should be extracted but isn't.
"""nested docstring"""


class Outer:
class Inner:
"nested class docstring" # XXX This should be extracted but isn't.
"nested class docstring"
22 changes: 16 additions & 6 deletions Lib/test/test_tools/i18n_data/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,22 @@ msgstr ""
msgid ""
msgstr ""

#: messages.py:19 messages.py:20
#: messages.py:19 messages.py:20 messages.py:21
msgid "parentheses"
msgstr ""

#: messages.py:23
#: messages.py:24
msgid "Hello, world!"
msgstr ""

#: messages.py:26
#: messages.py:27
msgid ""
"Hello,\n"
" multiline!\n"
msgstr ""

#: messages.py:46 messages.py:89 messages.py:90 messages.py:93 messages.py:94
#: messages.py:99
#: messages.py:99 messages.py:100 messages.py:101
msgid "foo"
msgid_plural "foos"
msgstr[0] ""
Expand Down Expand Up @@ -68,22 +68,32 @@ msgstr ""
msgid "set"
msgstr ""

#: messages.py:63
#: messages.py:62 messages.py:63
msgid "nested string"
msgstr ""

#: messages.py:68
msgid "baz"
msgstr ""

#: messages.py:71 messages.py:75
msgid "default value"
msgstr ""

#: messages.py:91 messages.py:92 messages.py:95 messages.py:96
msgctxt "context"
msgid "foo"
msgid_plural "foos"
msgstr[0] ""
msgstr[1] ""

#: messages.py:100
#: messages.py:102
msgid "domain foo"
msgstr ""

#: messages.py:118 messages.py:119
msgid "world"
msgid_plural "worlds"
msgstr[0] ""
msgstr[1] ""

21 changes: 15 additions & 6 deletions Lib/test/test_tools/i18n_data/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# Extra parentheses
(_("parentheses"))
((_("parentheses")))
_(("parentheses"))

# Multiline strings
_("Hello, "
Expand All @@ -32,23 +33,22 @@
_(None)
_(1)
_(False)
_(("invalid"))
_(["invalid"])
_({"invalid"})
_("string"[3])
_("string"[:3])
_({"string": "foo"})

# pygettext does not allow keyword arguments, but both xgettext and pybabel do
_(x="kwargs work!")
_(x="kwargs are not allowed!")

# Unusual, but valid arguments
_("foo", "bar")
_("something", x="something else")

# .format()
_("Hello, {}!").format("world") # valid
_("Hello, {}!".format("world")) # invalid, but xgettext and pybabel extract the first string
_("Hello, {}!".format("world")) # invalid, but xgettext extracts the first string

# Nested structures
_("1"), _("2")
Expand All @@ -59,7 +59,7 @@

# Nested functions and classes
def test():
_("nested string") # XXX This should be extracted but isn't.
_("nested string")
[_("nested string")]


Expand All @@ -68,11 +68,11 @@ def bar(self):
return _("baz")


def bar(x=_('default value')): # XXX This should be extracted but isn't.
def bar(x=_('default value')):
pass


def baz(x=[_('default value')]): # XXX This should be extracted but isn't.
def baz(x=[_('default value')]):
pass


Expand All @@ -97,6 +97,8 @@ def _(x="don't extract me"):

# Complex arguments
ngettext("foo", "foos", 42 + (10 - 20))
ngettext("foo", "foos", *args)
ngettext("foo", "foos", **kwargs)
dgettext(["some", {"complex"}, ("argument",)], "domain foo")

# Invalid calls which are not extracted
Expand All @@ -108,3 +110,10 @@ def _(x="don't extract me"):
dngettext('domain', 'foo')
dpgettext('domain', 'context')
dnpgettext('domain', 'context', 'foo')
dgettext(*args, 'foo')
dpgettext(*args, 'context', 'foo')
dnpgettext(*args, 'context', 'foo', 'foos')

# f-strings
f"Hello, {_('world')}!"
f"Hello, {ngettext('world', 'worlds', 3)}!"
28 changes: 26 additions & 2 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def assert_POT_equal(self, expected, actual):
self.maxDiff = None
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))

def extract_from_str(self, module_content, *, args=(), strict=True):
def extract_from_str(self, module_content, *, args=(), strict=True, with_stderr=False):
"""Return all msgids extracted from module_content."""
filename = 'test.py'
with temp_cwd(None):
Expand All @@ -98,12 +98,18 @@ def extract_from_str(self, module_content, *, args=(), strict=True):
self.assertEqual(res.err, b'')
with open('messages.pot', encoding='utf-8') as fp:
data = fp.read()
return self.get_msgids(data)
msgids = self.get_msgids(data)
if not with_stderr:
return msgids
return msgids, res.err

def extract_docstrings_from_str(self, module_content):
"""Return all docstrings extracted from module_content."""
return self.extract_from_str(module_content, args=('--docstrings',), strict=False)

def get_stderr(self, module_content):
return self.extract_from_str(module_content, strict=False, with_stderr=True)[1]

def test_header(self):
"""Make sure the required fields are in the header, according to:
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
Expand Down Expand Up @@ -407,6 +413,24 @@ def test_files_list(self):
self.assertIn(f'msgid "{text2}"', data)
self.assertNotIn(text3, data)

def test_error_messages(self):
"""Test that pygettext outputs error messages to stderr."""
stderr = self.get_stderr(dedent('''\
_(1+2)
ngettext('foo')
dgettext(*args, 'foo')
'''))

# Normalize line endings on Windows
stderr = stderr.decode('utf-8').replace('\r', '')

self.assertEqual(
stderr,
"*** test.py:1: Expected a string constant for argument 1, got 1 + 2\n"
"*** test.py:2: Expected at least 2 positional argument(s) in gettext call, got 1\n"
"*** test.py:3: Variable positional arguments are not allowed in gettext calls\n"
)


def update_POT_snapshots():
for input_file in DATA_DIR.glob('*.py'):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix several bugs in extraction by switching to an AST parser in :program:`pygettext`.
Loading
Loading