Skip to content

Commit 58a4357

Browse files
encukoublaisepwqferrbswckAA-Turner
authored
gh-127833: Docs: Add a grammar-snippet directive & replace productionlist (GH-127835)
As a first step toward aligning the grammar documentation with Python's actual grammar, this overrides the ReST `productionlist` directive to: - use `:` instead of the `::=` symbol - add syntax highlighting for strings (using a Pygments highlighting class) All links and link targets should be preserved. (Unfortunately, this reaches into some Sphinx internals; I don't see a better way to do exactly what Sphinx does.) This also adds a new directive, `grammar-snippet`, which formats the snippet almost exactly like what's in the source, modulo syntax highlighting and keeping the backtick character to mark links to other rules. This will allow formatting the snippets as in the grammar file (file:///home/encukou/dev/cpython/Doc/build/html/reference/grammar.html). The new directive is applied to two simple rules in toplevel_components.rst --------- Co-authored-by: Blaise Pabon <[email protected]> Co-authored-by: William Ferreira <[email protected]> Co-authored-by: bswck <[email protected]> Co-authored-by: Adam Turner <[email protected]>
1 parent e5c3b7e commit 58a4357

File tree

3 files changed

+226
-2
lines changed

3 files changed

+226
-2
lines changed

Doc/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
'c_annotations',
2828
'changes',
2929
'glossary_search',
30+
'grammar_snippet',
3031
'lexers',
3132
'misc_news',
3233
'pydoc_topics',

Doc/reference/toplevel_components.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ File input
6666

6767
All input read from non-interactive files has the same form:
6868

69-
.. productionlist:: python-grammar
69+
.. grammar-snippet::
70+
:group: python-grammar
71+
7072
file_input: (NEWLINE | `statement`)*
7173

7274
This syntax is used in the following situations:
@@ -85,7 +87,9 @@ Interactive input
8587

8688
Input in interactive mode is parsed using the following grammar:
8789

88-
.. productionlist:: python-grammar
90+
.. grammar-snippet::
91+
:group: python-grammar
92+
8993
interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE
9094

9195
Note that a (top-level) compound statement must be followed by a blank line in
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
"""Support for documenting Python's grammar."""
2+
3+
from __future__ import annotations
4+
5+
import re
6+
from typing import TYPE_CHECKING
7+
8+
from docutils import nodes
9+
from docutils.parsers.rst import directives
10+
from sphinx import addnodes
11+
from sphinx.domains.std import token_xrefs
12+
from sphinx.util.docutils import SphinxDirective
13+
from sphinx.util.nodes import make_id
14+
15+
if TYPE_CHECKING:
16+
from collections.abc import Sequence
17+
from typing import Any
18+
19+
from docutils.nodes import Node
20+
from sphinx.application import Sphinx
21+
from sphinx.util.typing import ExtensionMetadata
22+
23+
24+
class snippet_string_node(nodes.inline): # noqa: N801 (snake_case is fine)
25+
"""Node for a string literal in a grammar snippet."""
26+
27+
def __init__(
28+
self,
29+
rawsource: str = '',
30+
text: str = '',
31+
*children: Node,
32+
**attributes: Any,
33+
) -> None:
34+
super().__init__(rawsource, text, *children, **attributes)
35+
# Use the Pygments highlight class for `Literal.String.Other`
36+
self['classes'].append('sx')
37+
38+
39+
class GrammarSnippetBase(SphinxDirective):
40+
"""Common functionality for GrammarSnippetDirective & CompatProductionList."""
41+
42+
# The option/argument handling is left to the individual classes.
43+
44+
def make_grammar_snippet(
45+
self, options: dict[str, Any], content: Sequence[str]
46+
) -> list[nodes.paragraph]:
47+
"""Create a literal block from options & content."""
48+
49+
group_name = options['group']
50+
51+
# Docutils elements have a `rawsource` attribute that is supposed to be
52+
# set to the original ReST source.
53+
# Sphinx does the following with it:
54+
# - if it's empty, set it to `self.astext()`
55+
# - if it matches `self.astext()` when generating the output,
56+
# apply syntax highlighting (which is based on the plain-text content
57+
# and thus discards internal formatting, like references).
58+
# To get around this, we set it to this non-empty string:
59+
rawsource = 'You should not see this.'
60+
61+
literal = nodes.literal_block(
62+
rawsource,
63+
'',
64+
classes=['highlight'],
65+
)
66+
67+
grammar_re = re.compile(
68+
r"""
69+
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
70+
(?=:) # ... followed by a colon
71+
|
72+
(?P<rule_ref>`[^\s`]+`) # identifier in backquotes
73+
|
74+
(?P<single_quoted>'[^']*') # string in 'quotes'
75+
|
76+
(?P<double_quoted>"[^"]*") # string in "quotes"
77+
""",
78+
re.VERBOSE,
79+
)
80+
81+
for line in content:
82+
last_pos = 0
83+
for match in grammar_re.finditer(line):
84+
# Handle text between matches
85+
if match.start() > last_pos:
86+
literal += nodes.Text(line[last_pos : match.start()])
87+
last_pos = match.end()
88+
89+
# Handle matches
90+
group_dict = {
91+
name: content
92+
for name, content in match.groupdict().items()
93+
if content is not None
94+
}
95+
match group_dict:
96+
case {'rule_name': name}:
97+
literal += self.make_link_target_for_token(
98+
group_name, name
99+
)
100+
case {'rule_ref': ref_text}:
101+
literal += token_xrefs(ref_text, group_name)
102+
case {'single_quoted': name} | {'double_quoted': name}:
103+
literal += snippet_string_node('', name)
104+
case _:
105+
raise ValueError('unhandled match')
106+
literal += nodes.Text(line[last_pos:] + '\n')
107+
108+
node = nodes.paragraph(
109+
'',
110+
'',
111+
literal,
112+
)
113+
114+
return [node]
115+
116+
def make_link_target_for_token(
117+
self, group_name: str, name: str
118+
) -> addnodes.literal_strong:
119+
"""Return a literal node which is a link target for the given token."""
120+
name_node = addnodes.literal_strong()
121+
122+
# Cargo-culted magic to make `name_node` a link target
123+
# similar to Sphinx `production`.
124+
# This needs to be the same as what Sphinx does
125+
# to avoid breaking existing links.
126+
domain = self.env.domains['std']
127+
obj_name = f"{group_name}:{name}"
128+
prefix = f'grammar-token-{group_name}'
129+
node_id = make_id(self.env, self.state.document, prefix, name)
130+
name_node['ids'].append(node_id)
131+
self.state.document.note_implicit_target(name_node, name_node)
132+
domain.note_object('token', obj_name, node_id, location=name_node)
133+
134+
text_node = nodes.Text(name)
135+
name_node += text_node
136+
return name_node
137+
138+
139+
class GrammarSnippetDirective(GrammarSnippetBase):
140+
"""Transform a grammar-snippet directive to a Sphinx literal_block
141+
142+
That is, turn something like:
143+
144+
.. grammar-snippet:: file
145+
:group: python-grammar
146+
147+
file: (NEWLINE | statement)*
148+
149+
into something similar to Sphinx productionlist, but better suited
150+
for our needs:
151+
- Instead of `::=`, use a colon, as in `Grammar/python.gram`
152+
- Show the listing almost as is, with no auto-aligment.
153+
The only special character is the backtick, which marks tokens.
154+
155+
Unlike Sphinx's productionlist, this directive supports options.
156+
The "group" must be given as a named option.
157+
The content must be preceded by a blank line (like with most ReST
158+
directives).
159+
"""
160+
161+
has_content = True
162+
option_spec = {
163+
'group': directives.unchanged_required,
164+
}
165+
166+
# We currently ignore arguments.
167+
required_arguments = 0
168+
optional_arguments = 1
169+
final_argument_whitespace = True
170+
171+
def run(self) -> list[nodes.paragraph]:
172+
return self.make_grammar_snippet(self.options, self.content)
173+
174+
175+
class CompatProductionList(GrammarSnippetBase):
176+
"""Create grammar snippets from reST productionlist syntax
177+
178+
This is intended to be a transitional directive, used while we switch
179+
from productionlist to grammar-snippet.
180+
It makes existing docs that use the ReST syntax look like grammar-snippet,
181+
as much as possible.
182+
"""
183+
184+
has_content = False
185+
required_arguments = 1
186+
optional_arguments = 0
187+
final_argument_whitespace = True
188+
option_spec = {}
189+
190+
def run(self) -> list[nodes.paragraph]:
191+
# The "content" of a productionlist is actually the first and only
192+
# argument. The first line is the group; the rest is the content lines.
193+
lines = self.arguments[0].splitlines()
194+
group = lines[0].strip()
195+
options = {'group': group}
196+
# We assume there's a colon in each line; align on it.
197+
align_column = max(line.index(':') for line in lines[1:]) + 1
198+
content = []
199+
for line in lines[1:]:
200+
rule_name, _colon, text = line.partition(':')
201+
rule_name = rule_name.strip()
202+
if rule_name:
203+
name_part = rule_name + ':'
204+
else:
205+
name_part = ''
206+
content.append(f'{name_part:<{align_column}}{text}')
207+
return self.make_grammar_snippet(options, content)
208+
209+
210+
def setup(app: Sphinx) -> ExtensionMetadata:
211+
app.add_directive('grammar-snippet', GrammarSnippetDirective)
212+
app.add_directive_to_domain(
213+
'std', 'productionlist', CompatProductionList, override=True
214+
)
215+
return {
216+
'version': '1.0',
217+
'parallel_read_safe': True,
218+
'parallel_write_safe': True,
219+
}

0 commit comments

Comments
 (0)