|
| 1 | +"""Support for documenting Python's grammar.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import re |
| 6 | +from typing import TYPE_CHECKING |
| 7 | + |
| 8 | +from docutils import nodes |
| 9 | +from docutils.parsers.rst import directives |
| 10 | +from sphinx import addnodes |
| 11 | +from sphinx.domains.std import token_xrefs |
| 12 | +from sphinx.util.docutils import SphinxDirective |
| 13 | +from sphinx.util.nodes import make_id |
| 14 | + |
| 15 | +if TYPE_CHECKING: |
| 16 | + from collections.abc import Sequence |
| 17 | + from typing import Any |
| 18 | + |
| 19 | + from docutils.nodes import Node |
| 20 | + from sphinx.application import Sphinx |
| 21 | + from sphinx.util.typing import ExtensionMetadata |
| 22 | + |
| 23 | + |
| 24 | +class snippet_string_node(nodes.inline): # noqa: N801 (snake_case is fine) |
| 25 | + """Node for a string literal in a grammar snippet.""" |
| 26 | + |
| 27 | + def __init__( |
| 28 | + self, |
| 29 | + rawsource: str = '', |
| 30 | + text: str = '', |
| 31 | + *children: Node, |
| 32 | + **attributes: Any, |
| 33 | + ) -> None: |
| 34 | + super().__init__(rawsource, text, *children, **attributes) |
| 35 | + # Use the Pygments highlight class for `Literal.String.Other` |
| 36 | + self['classes'].append('sx') |
| 37 | + |
| 38 | + |
| 39 | +class GrammarSnippetBase(SphinxDirective): |
| 40 | + """Common functionality for GrammarSnippetDirective & CompatProductionList.""" |
| 41 | + |
| 42 | + # The option/argument handling is left to the individual classes. |
| 43 | + |
| 44 | + def make_grammar_snippet( |
| 45 | + self, options: dict[str, Any], content: Sequence[str] |
| 46 | + ) -> list[nodes.paragraph]: |
| 47 | + """Create a literal block from options & content.""" |
| 48 | + |
| 49 | + group_name = options['group'] |
| 50 | + |
| 51 | + # Docutils elements have a `rawsource` attribute that is supposed to be |
| 52 | + # set to the original ReST source. |
| 53 | + # Sphinx does the following with it: |
| 54 | + # - if it's empty, set it to `self.astext()` |
| 55 | + # - if it matches `self.astext()` when generating the output, |
| 56 | + # apply syntax highlighting (which is based on the plain-text content |
| 57 | + # and thus discards internal formatting, like references). |
| 58 | + # To get around this, we set it to this non-empty string: |
| 59 | + rawsource = 'You should not see this.' |
| 60 | + |
| 61 | + literal = nodes.literal_block( |
| 62 | + rawsource, |
| 63 | + '', |
| 64 | + classes=['highlight'], |
| 65 | + ) |
| 66 | + |
| 67 | + grammar_re = re.compile( |
| 68 | + r""" |
| 69 | + (?P<rule_name>^[a-zA-Z0-9_]+) # identifier at start of line |
| 70 | + (?=:) # ... followed by a colon |
| 71 | + | |
| 72 | + (?P<rule_ref>`[^\s`]+`) # identifier in backquotes |
| 73 | + | |
| 74 | + (?P<single_quoted>'[^']*') # string in 'quotes' |
| 75 | + | |
| 76 | + (?P<double_quoted>"[^"]*") # string in "quotes" |
| 77 | + """, |
| 78 | + re.VERBOSE, |
| 79 | + ) |
| 80 | + |
| 81 | + for line in content: |
| 82 | + last_pos = 0 |
| 83 | + for match in grammar_re.finditer(line): |
| 84 | + # Handle text between matches |
| 85 | + if match.start() > last_pos: |
| 86 | + literal += nodes.Text(line[last_pos : match.start()]) |
| 87 | + last_pos = match.end() |
| 88 | + |
| 89 | + # Handle matches |
| 90 | + group_dict = { |
| 91 | + name: content |
| 92 | + for name, content in match.groupdict().items() |
| 93 | + if content is not None |
| 94 | + } |
| 95 | + match group_dict: |
| 96 | + case {'rule_name': name}: |
| 97 | + literal += self.make_link_target_for_token( |
| 98 | + group_name, name |
| 99 | + ) |
| 100 | + case {'rule_ref': ref_text}: |
| 101 | + literal += token_xrefs(ref_text, group_name) |
| 102 | + case {'single_quoted': name} | {'double_quoted': name}: |
| 103 | + literal += snippet_string_node('', name) |
| 104 | + case _: |
| 105 | + raise ValueError('unhandled match') |
| 106 | + literal += nodes.Text(line[last_pos:] + '\n') |
| 107 | + |
| 108 | + node = nodes.paragraph( |
| 109 | + '', |
| 110 | + '', |
| 111 | + literal, |
| 112 | + ) |
| 113 | + |
| 114 | + return [node] |
| 115 | + |
| 116 | + def make_link_target_for_token( |
| 117 | + self, group_name: str, name: str |
| 118 | + ) -> addnodes.literal_strong: |
| 119 | + """Return a literal node which is a link target for the given token.""" |
| 120 | + name_node = addnodes.literal_strong() |
| 121 | + |
| 122 | + # Cargo-culted magic to make `name_node` a link target |
| 123 | + # similar to Sphinx `production`. |
| 124 | + # This needs to be the same as what Sphinx does |
| 125 | + # to avoid breaking existing links. |
| 126 | + domain = self.env.domains['std'] |
| 127 | + obj_name = f"{group_name}:{name}" |
| 128 | + prefix = f'grammar-token-{group_name}' |
| 129 | + node_id = make_id(self.env, self.state.document, prefix, name) |
| 130 | + name_node['ids'].append(node_id) |
| 131 | + self.state.document.note_implicit_target(name_node, name_node) |
| 132 | + domain.note_object('token', obj_name, node_id, location=name_node) |
| 133 | + |
| 134 | + text_node = nodes.Text(name) |
| 135 | + name_node += text_node |
| 136 | + return name_node |
| 137 | + |
| 138 | + |
| 139 | +class GrammarSnippetDirective(GrammarSnippetBase): |
| 140 | + """Transform a grammar-snippet directive to a Sphinx literal_block |
| 141 | +
|
| 142 | + That is, turn something like: |
| 143 | +
|
| 144 | + .. grammar-snippet:: file |
| 145 | + :group: python-grammar |
| 146 | +
|
| 147 | + file: (NEWLINE | statement)* |
| 148 | +
|
| 149 | + into something similar to Sphinx productionlist, but better suited |
| 150 | + for our needs: |
| 151 | + - Instead of `::=`, use a colon, as in `Grammar/python.gram` |
| 152 | + - Show the listing almost as is, with no auto-aligment. |
| 153 | + The only special character is the backtick, which marks tokens. |
| 154 | +
|
| 155 | + Unlike Sphinx's productionlist, this directive supports options. |
| 156 | + The "group" must be given as a named option. |
| 157 | + The content must be preceded by a blank line (like with most ReST |
| 158 | + directives). |
| 159 | + """ |
| 160 | + |
| 161 | + has_content = True |
| 162 | + option_spec = { |
| 163 | + 'group': directives.unchanged_required, |
| 164 | + } |
| 165 | + |
| 166 | + # We currently ignore arguments. |
| 167 | + required_arguments = 0 |
| 168 | + optional_arguments = 1 |
| 169 | + final_argument_whitespace = True |
| 170 | + |
| 171 | + def run(self) -> list[nodes.paragraph]: |
| 172 | + return self.make_grammar_snippet(self.options, self.content) |
| 173 | + |
| 174 | + |
| 175 | +class CompatProductionList(GrammarSnippetBase): |
| 176 | + """Create grammar snippets from reST productionlist syntax |
| 177 | +
|
| 178 | + This is intended to be a transitional directive, used while we switch |
| 179 | + from productionlist to grammar-snippet. |
| 180 | + It makes existing docs that use the ReST syntax look like grammar-snippet, |
| 181 | + as much as possible. |
| 182 | + """ |
| 183 | + |
| 184 | + has_content = False |
| 185 | + required_arguments = 1 |
| 186 | + optional_arguments = 0 |
| 187 | + final_argument_whitespace = True |
| 188 | + option_spec = {} |
| 189 | + |
| 190 | + def run(self) -> list[nodes.paragraph]: |
| 191 | + # The "content" of a productionlist is actually the first and only |
| 192 | + # argument. The first line is the group; the rest is the content lines. |
| 193 | + lines = self.arguments[0].splitlines() |
| 194 | + group = lines[0].strip() |
| 195 | + options = {'group': group} |
| 196 | + # We assume there's a colon in each line; align on it. |
| 197 | + align_column = max(line.index(':') for line in lines[1:]) + 1 |
| 198 | + content = [] |
| 199 | + for line in lines[1:]: |
| 200 | + rule_name, _colon, text = line.partition(':') |
| 201 | + rule_name = rule_name.strip() |
| 202 | + if rule_name: |
| 203 | + name_part = rule_name + ':' |
| 204 | + else: |
| 205 | + name_part = '' |
| 206 | + content.append(f'{name_part:<{align_column}}{text}') |
| 207 | + return self.make_grammar_snippet(options, content) |
| 208 | + |
| 209 | + |
| 210 | +def setup(app: Sphinx) -> ExtensionMetadata: |
| 211 | + app.add_directive('grammar-snippet', GrammarSnippetDirective) |
| 212 | + app.add_directive_to_domain( |
| 213 | + 'std', 'productionlist', CompatProductionList, override=True |
| 214 | + ) |
| 215 | + return { |
| 216 | + 'version': '1.0', |
| 217 | + 'parallel_read_safe': True, |
| 218 | + 'parallel_write_safe': True, |
| 219 | + } |
0 commit comments