Skip to content

gh-127833: Use productionlist nodes to implement the grammar-snippet directive #130376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 112 additions & 70 deletions Doc/tools/extensions/grammar_snippet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from sphinx.util.nodes import make_id

if TYPE_CHECKING:
from collections.abc import Sequence
from typing import Any
from collections.abc import Iterable, Iterator, Sequence
from typing import Any, Final

from docutils.nodes import Node
from sphinx.application import Sphinx
Expand All @@ -41,98 +41,140 @@ class GrammarSnippetBase(SphinxDirective):

# The option/argument handling is left to the individual classes.

grammar_re: Final = re.compile(
r"""
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
(?=:) # ... followed by a colon
|
(?P<rule_ref>`[^\s`]+`) # identifier in backquotes
|
(?P<single_quoted>'[^']*') # string in 'quotes'
|
(?P<double_quoted>"[^"]*") # string in "quotes"
""",
re.VERBOSE,
)

def make_grammar_snippet(
self, options: dict[str, Any], content: Sequence[str]
) -> list[nodes.paragraph]:
) -> list[addnodes.productionlist]:
"""Create a literal block from options & content."""

group_name = options['group']

# Docutils elements have a `rawsource` attribute that is supposed to be
# set to the original ReST source.
# Sphinx does the following with it:
# - if it's empty, set it to `self.astext()`
# - if it matches `self.astext()` when generating the output,
# apply syntax highlighting (which is based on the plain-text content
# and thus discards internal formatting, like references).
# To get around this, we set it to this non-empty string:
rawsource = 'You should not see this.'

literal = nodes.literal_block(
rawsource,
node_location = self.get_location()
production_nodes = []
for rawsource, production_defs in self.production_definitions(content):
production = self.make_production(
rawsource,
production_defs,
group_name=group_name,
location=node_location,
)
production_nodes.append(production)

node = addnodes.productionlist(
'',
*production_nodes,
support_smartquotes=False,
classes=['highlight'],
)
self.set_source_info(node)
return [node]

grammar_re = re.compile(
r"""
(?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line
(?=:) # ... followed by a colon
|
(?P<rule_ref>`[^\s`]+`) # identifier in backquotes
|
(?P<single_quoted>'[^']*') # string in 'quotes'
|
(?P<double_quoted>"[^"]*") # string in "quotes"
""",
re.VERBOSE,
)

for line in content:
def production_definitions(
self, lines: Iterable[str], /
) -> Iterator[tuple[str, list[tuple[str, str]]]]:
"""Yield pairs of rawsource and production content dicts."""
production_lines: list[str] = []
production_content: list[tuple[str, str]] = []
for line in lines:
# If this line is the start of a new rule (text in the column 1),
# emit the current production and start a new one.
if not line[:1].isspace():
rawsource = '\n'.join(production_lines)
production_lines.clear()
if production_content:
yield rawsource, production_content
production_content = []

# Append the current line for the raw source
production_lines.append(line)

# Parse the line into constituent parts
last_pos = 0
for match in grammar_re.finditer(line):
for match in self.grammar_re.finditer(line):
# Handle text between matches
if match.start() > last_pos:
literal += nodes.Text(line[last_pos : match.start()])
unmatched_text = line[last_pos : match.start()]
production_content.append(('text', unmatched_text))
last_pos = match.end()

# Handle matches
group_dict = {
name: content
for name, content in match.groupdict().items()
# Handle matches.
# After filtering None (non-matches), exactly one groupdict()
# entry should remain.
[(re_group_name, content)] = (
(re_group_name, content)
for re_group_name, content in match.groupdict().items()
if content is not None
}
match group_dict:
case {'rule_name': name}:
literal += self.make_link_target_for_token(
group_name, name
)
case {'rule_ref': ref_text}:
literal += token_xrefs(ref_text, group_name)
case {'single_quoted': name} | {'double_quoted': name}:
literal += snippet_string_node('', name)
case _:
raise ValueError('unhandled match')
literal += nodes.Text(line[last_pos:] + '\n')

node = nodes.paragraph(
'',
'',
literal,
)
)
production_content.append((re_group_name, content))
production_content.append(('text', line[last_pos:] + '\n'))

return [node]
# Emit the final production
if production_content:
rawsource = '\n'.join(production_lines)
yield rawsource, production_content

def make_link_target_for_token(
self, group_name: str, name: str
def make_production(
self,
rawsource: str,
production_defs: list[tuple[str, str]],
*,
group_name: str,
location: str,
) -> addnodes.production:
"""Create a production node from a list of parts."""
production_node = addnodes.production(rawsource)
for re_group_name, content in production_defs:
match re_group_name:
case 'rule_name':
production_node += self.make_name_target(
name=content,
production_group=group_name,
location=location,
)
case 'rule_ref':
production_node += token_xrefs(content, group_name)
case 'single_quoted' | 'double_quoted':
production_node += snippet_string_node('', content)
case 'text':
production_node += nodes.Text(content)
case _:
raise ValueError(f'unhandled match: {re_group_name!r}')
return production_node

def make_name_target(
self,
*,
name: str,
production_group: str,
location: str,
) -> addnodes.literal_strong:
"""Return a literal node which is a link target for the given token."""
name_node = addnodes.literal_strong()
"""Make a link target for the given production."""

# Cargo-culted magic to make `name_node` a link target
# similar to Sphinx `production`.
# This needs to be the same as what Sphinx does
# to avoid breaking existing links.
domain = self.env.domains['std']
obj_name = f"{group_name}:{name}"
prefix = f'grammar-token-{group_name}'

name_node = addnodes.literal_strong(name, name)
prefix = f'grammar-token-{production_group}'
node_id = make_id(self.env, self.state.document, prefix, name)
name_node['ids'].append(node_id)
self.state.document.note_implicit_target(name_node, name_node)
domain.note_object('token', obj_name, node_id, location=name_node)

text_node = nodes.Text(name)
name_node += text_node
obj_name = f'{production_group}:{name}' if production_group else name
std = self.env.domains.standard_domain
std.note_object('token', obj_name, node_id, location=location)
return name_node


Expand Down Expand Up @@ -168,7 +210,7 @@ class GrammarSnippetDirective(GrammarSnippetBase):
optional_arguments = 1
final_argument_whitespace = True

def run(self) -> list[nodes.paragraph]:
def run(self) -> list[addnodes.productionlist]:
return self.make_grammar_snippet(self.options, self.content)


Expand All @@ -187,7 +229,7 @@ class CompatProductionList(GrammarSnippetBase):
final_argument_whitespace = True
option_spec = {}

def run(self) -> list[nodes.paragraph]:
def run(self) -> list[addnodes.productionlist]:
# The "content" of a productionlist is actually the first and only
# argument. The first line is the group; the rest is the content lines.
lines = self.arguments[0].splitlines()
Expand Down
Loading
Loading