Skip to content

Commit efeae28

Browse files
authored
Infra: Improve email and link processing and rendering in headers (#2467)
* Infra: Include mailing list link type in text for clarity, & refactor * Infra: Support mailing list info pages as well as archives * Infra: Pretty-format Discourse categories/threads in headers as well * Infra: Show friendly-name of target in Post-History link hover text * Infra: Automatically link list addresses to list pages in Discussions-To * Infra: Use simpler check for title in Discourse URL header processing * Infra: Automatically trim trailing commas & whitespace in header values
1 parent 4a5751f commit efeae28

File tree

2 files changed

+119
-21
lines changed

2 files changed

+119
-21
lines changed

pep-0009.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ Type: Process
88
Content-Type: text/x-rst
99
Created: 14-Aug-2001
1010
Post-History:
11-
Resolution: https://mail.python.org/mailman/private/peps/2016-January/001165.html
11+
Resolution: https://mail.python.org/archives/list/[email protected]/thread/2YMHVPRDWGQLA5A2FKXE2JMLM2HQEEGW/
12+
1213

1314
::
1415

pep_sphinx_extensions/pep_processor/transforms/pep_headers.py

Lines changed: 117 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,23 @@ def apply(self) -> None:
7474
if not isinstance(node, nodes.reference):
7575
continue
7676
node.replace_self(_mask_email(node))
77-
elif name in {"discussions-to", "resolution"}:
78-
# only handle threads, email addresses in Discussions-To aren't
79-
# masked.
77+
elif name in {"discussions-to", "resolution", "post-history"}:
78+
# Prettify mailing list and Discourse links
8079
for node in para:
81-
if not isinstance(node, nodes.reference):
80+
if (not isinstance(node, nodes.reference)
81+
or not node["refuri"]):
82+
continue
83+
# Have known mailto links link to their main list pages
84+
if node["refuri"].lower().startswith("mailto:"):
85+
node["refuri"] = _generate_list_url(node["refuri"])
86+
parts = node["refuri"].lower().split("/")
87+
if len(parts) <= 2 or parts[2] not in LINK_PRETTIFIERS:
8288
continue
83-
if node["refuri"].startswith("https://mail.python.org"):
84-
node[0] = _pretty_thread(node[0])
89+
pretty_title = _make_link_pretty(str(node["refuri"]))
90+
if name == "post-history":
91+
node["reftitle"] = pretty_title
92+
else:
93+
node[0] = nodes.Text(pretty_title)
8594
elif name in {"replaces", "superseded-by", "requires"}:
8695
# replace PEP numbers with normalised list of links to PEPs
8796
new_body = []
@@ -93,25 +102,113 @@ def apply(self) -> None:
93102
# Mark unneeded fields
94103
fields_to_remove.append(field)
95104

105+
# Remove any trailing commas and whitespace in the headers
106+
if para and isinstance(para[-1], nodes.Text):
107+
last_node = para[-1]
108+
if last_node.astext().strip() == ",":
109+
last_node.parent.remove(last_node)
110+
else:
111+
para[-1] = last_node.rstrip().rstrip(",")
112+
96113
# Remove unneeded fields
97114
for field in fields_to_remove:
98115
field.parent.remove(field)
99116

100117

101-
def _pretty_thread(text: nodes.Text) -> nodes.Text:
102-
parts = text.title().replace("Sig", "SIG").split("/")
118+
def _generate_list_url(mailto: str) -> str:
119+
list_name_domain = mailto.lower().removeprefix("mailto:").strip()
120+
list_name = list_name_domain.split("@")[0]
103121

104-
# mailman structure is
105-
# https://mail.python.org/archives/list/<list name>/thread/<id>
106-
try:
107-
return nodes.Text(parts[parts.index("Archives") + 2].removesuffix("@Python.Org"))
108-
except ValueError:
109-
pass
122+
if list_name_domain.endswith("@googlegroups.com"):
123+
return f"https://groups.google.com/g/{list_name}"
124+
125+
if not list_name_domain.endswith("@python.org"):
126+
return mailto
127+
128+
# Active lists not yet on Mailman3; this URL will redirect if/when they are
129+
if list_name in {"csv", "db-sig", "doc-sig", "python-list", "web-sig"}:
130+
return f"https://mail.python.org/mailman/listinfo/{list_name}"
131+
# Retired lists that are closed for posting, so only the archive matters
132+
if list_name in {"import-sig", "python-3000"}:
133+
return f"https://mail.python.org/pipermail/{list_name}/"
134+
# The remaining lists (and any new ones) are all on Mailman3/Hyperkitty
135+
return f"https://mail.python.org/archives/list/{list_name}@python.org/"
136+
137+
138+
def _process_list_url(parts: list[str]) -> tuple[str, str]:
139+
item_type = "list"
140+
141+
# HyperKitty (Mailman3) archive structure is
142+
# https://mail.python.org/archives/list/<list_name>/thread/<id>
143+
if "archives" in parts:
144+
list_name = (
145+
parts[parts.index("archives") + 2].removesuffix("@python.org"))
146+
if len(parts) > 6 and parts[6] in {"message", "thread"}:
147+
item_type = parts[6]
148+
149+
# Mailman3 list info structure is
150+
# https://mail.python.org/mailman3/lists/<list_name>.python.org/
151+
elif "mailman3" in parts:
152+
list_name = (
153+
parts[parts.index("mailman3") + 2].removesuffix(".python.org"))
154+
155+
# Pipermail (Mailman) archive structure is
156+
# https://mail.python.org/pipermail/<list_name>/<month>-<year>/<id>
157+
elif "pipermail" in parts:
158+
list_name = parts[parts.index("pipermail") + 1]
159+
item_type = "message" if len(parts) > 6 else "list"
160+
161+
# Mailman listinfo structure is
162+
# https://mail.python.org/mailman/listinfo/<list_name>
163+
elif "listinfo" in parts:
164+
list_name = parts[parts.index("listinfo") + 1]
165+
166+
# Not a link to a mailing list, message or thread
167+
else:
168+
raise ValueError(
169+
f"{'/'.join(parts)} not a link to a list, message or thread")
110170

111-
# pipermail structure is
112-
# https://mail.python.org/pipermail/<list name>/<month-year>/<id>
171+
return list_name, item_type
172+
173+
174+
def _process_discourse_url(parts: list[str]) -> tuple[str, str]:
175+
item_name = "discourse"
176+
177+
if len(parts) < 5 or ("t" not in parts and "c" not in parts):
178+
raise ValueError(
179+
f"{'/'.join(parts)} not a link to a Discourse thread or category")
180+
181+
first_subpart = parts[4]
182+
has_title = not first_subpart.isnumeric()
183+
184+
if "t" in parts:
185+
item_type = "post" if len(parts) > (5 + has_title) else "thread"
186+
elif "c" in parts:
187+
item_type = "category"
188+
if has_title:
189+
item_name = f"{first_subpart.replace('-', ' ')} {item_name}"
190+
191+
return item_name, item_type
192+
193+
194+
# Domains supported for pretty URL parsing
195+
LINK_PRETTIFIERS = {
196+
"mail.python.org": _process_list_url,
197+
"discuss.python.org": _process_discourse_url,
198+
}
199+
200+
201+
def _process_pretty_url(url: str) -> tuple[str, str]:
202+
parts = url.lower().strip().strip("/").split("/")
113203
try:
114-
return nodes.Text(parts[parts.index("Pipermail") + 1])
115-
except ValueError:
116-
# archives and pipermail not in list, e.g. PEP 245
117-
return text
204+
item_name, item_type = LINK_PRETTIFIERS[parts[2]](parts)
205+
except KeyError as error:
206+
raise ValueError(
207+
"{url} not a link to a recognized domain to prettify") from error
208+
item_name = item_name.title().replace("Sig", "SIG")
209+
return item_name, item_type
210+
211+
212+
def _make_link_pretty(url: str) -> str:
213+
item_name, item_type = _process_pretty_url(url)
214+
return f"{item_name} {item_type}"

0 commit comments

Comments
 (0)