Skip to content
This repository was archived by the owner on Nov 4, 2024. It is now read-only.

Commit 6b57eff

Browse files
authored
Compile docstring converter regexes (microsoft#707)
* compile docstring converter regexes * don't bother changing the comments
1 parent c0a0f0f commit 6b57eff

File tree

1 file changed

+49
-21
lines changed

1 file changed

+49
-21
lines changed

src/LanguageServer/Impl/Documentation/DocstringConverter.cs

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323

2424
namespace Microsoft.Python.LanguageServer.Documentation {
2525
internal class DocstringConverter {
26-
private static readonly string[] PotentialHeaders = new[] { "=", "-", "~", "+" };
27-
2826
/// <summary>
2927
/// Converts a docstring to a plaintext, human readable form. This will
3028
/// first strip any common leading indention (like inspect.cleandoc),
@@ -55,7 +53,7 @@ public static string ToPlaintext(string docstring) {
5553
/// <returns>The converted docstring, with Environment.NewLine line endings.</returns>
5654
public static string ToMarkdown(string docstring) => new DocstringConverter(docstring).Convert();
5755

58-
private readonly StringBuilder _builder = new StringBuilder();
56+
private readonly StringBuilder _builder;
5957
private bool _skipAppendEmptyLine = true;
6058
private bool _insideInlineCode = false;
6159
private bool _appendDirectiveBlock = false;
@@ -79,6 +77,7 @@ private int NextBlockIndent
7977
private string CurrentLineWithinBlock => CurrentLine.Substring(_blockIndent);
8078

8179
private DocstringConverter(string input) {
80+
_builder = new StringBuilder(input.Length);
8281
_state = ParseText;
8382
_lines = SplitDocstring(input);
8483
}
@@ -153,12 +152,28 @@ private void ParseText() {
153152
EatLine();
154153
}
155154

155+
private static readonly Regex DirectivesExtraNewlineRegex = new Regex(@"^\s*:(param|arg|type|return|rtype|raise|except|var|ivar|cvar|copyright|license)", RegexOptions.Singleline | RegexOptions.Compiled);
156+
157+
private static readonly (Regex, string)[] PotentialHeaders = new[] {
158+
(new Regex(@"^\s*=+(\s+=+)+$", RegexOptions.Singleline | RegexOptions.Compiled), "="),
159+
(new Regex(@"^\s*-+(\s+-+)+$", RegexOptions.Singleline | RegexOptions.Compiled), "-"),
160+
(new Regex(@"^\s*~+(\s+~+)+$", RegexOptions.Singleline | RegexOptions.Compiled), "~"),
161+
(new Regex(@"^\s*\++(\s+\++)+$", RegexOptions.Singleline | RegexOptions.Compiled), "+"),
162+
};
163+
164+
private static readonly Regex WhitespaceRegex = new Regex(@"\s", RegexOptions.Singleline | RegexOptions.Compiled);
165+
166+
private static readonly Regex TildaHeaderRegex = new Regex(@"^\s*~~~+$", RegexOptions.Singleline | RegexOptions.Compiled);
167+
private static readonly Regex PlusHeaderRegex = new Regex(@"^\s*\+\+\++$", RegexOptions.Singleline | RegexOptions.Compiled);
168+
private static readonly Regex LeadingAsteriskRegex = new Regex(@"^(\s+\* )(.*)$", RegexOptions.Singleline | RegexOptions.Compiled);
169+
private static readonly Regex UnescapedMarkdownCharsRegex = new Regex(@"(?<!\\)([_*~])", RegexOptions.Singleline | RegexOptions.Compiled);
170+
156171
private void AppendTextLine(string line) {
157172
line = PreprocessTextLine(line);
158173

159174
// Hack: attempt to put directives lines into their own paragraphs.
160175
// This should be removed once proper list-like parsing is written.
161-
if (!_insideInlineCode && Regex.IsMatch(line, @"^\s*:(param|arg|type|return|rtype|raise|except|var|ivar|cvar|copyright|license)")) {
176+
if (!_insideInlineCode && DirectivesExtraNewlineRegex.IsMatch(line)) {
162177
AppendLine();
163178
}
164179

@@ -181,17 +196,16 @@ private void AppendTextLine(string line) {
181196
// Only one part, and not inside code, so check header cases.
182197
if (parts.Length == 1) {
183198
// Handle weird separator lines which contain random spaces.
184-
foreach (var h in PotentialHeaders) {
185-
var hEsc = Regex.Escape(h);
186-
if (Regex.IsMatch(part, $"^\\s*{hEsc}+(\\s+{hEsc}+)+$")) {
187-
part = Regex.Replace(part, @"\s", h);
199+
foreach (var (regex, replacement) in PotentialHeaders) {
200+
if (regex.IsMatch(part)) {
201+
part = WhitespaceRegex.Replace(part, replacement);
188202
break;
189203
}
190204
}
191205

192206
// Replace ReST style ~~~ header to prevent it being interpreted as a code block
193207
// (an alternative in Markdown to triple backtick blocks).
194-
if (Regex.IsMatch(part, @"^\s*~~~+$")) {
208+
if (TildaHeaderRegex.IsMatch(part)) {
195209
Append(part.Replace('~', '-'));
196210
continue;
197211
}
@@ -200,7 +214,7 @@ private void AppendTextLine(string line) {
200214
// TODO: Handle the rest of these, and the precedence order (which depends on the
201215
// order heading lines are seen, not what the line contains).
202216
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#sections
203-
if (Regex.IsMatch(part, @"^\s*\+\+\++$")) {
217+
if (PlusHeaderRegex.IsMatch(part)) {
204218
Append(part.Replace('+', '-'));
205219
continue;
206220
}
@@ -211,7 +225,7 @@ private void AppendTextLine(string line) {
211225
// TODO: Replace this with real list parsing. This may have
212226
// false positives and cause random italics when the ReST list
213227
// doesn't match Markdown's specification.
214-
var match = Regex.Match(part, @"^(\s+\* )(.*)$");
228+
var match = LeadingAsteriskRegex.Match(part);
215229
if (match.Success) {
216230
Append(match.Groups[1].Value);
217231
part = match.Groups[2].Value;
@@ -230,7 +244,7 @@ private void AppendTextLine(string line) {
230244
// TODO: Strip footnote/citation references.
231245

232246
// Escape _, *, and ~, but ignore things like ":param \*\*kwargs:".
233-
part = Regex.Replace(part, @"(?<!\\)([_*~])", @"\$1");
247+
part = UnescapedMarkdownCharsRegex.Replace(part, @"\$1");
234248

235249
Append(part);
236250
}
@@ -242,17 +256,25 @@ private void AppendTextLine(string line) {
242256
_builder.AppendLine();
243257
}
244258

259+
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#literal-blocks
260+
private static readonly Regex LiteralBlockEmptyRegex = new Regex(@"^\s*::$", RegexOptions.Singleline | RegexOptions.Compiled);
261+
private static readonly (Regex, string)[] LiteralBlockReplacements = new[] {
262+
(new Regex(@"\s+::$", RegexOptions.Singleline | RegexOptions.Compiled), ""),
263+
(new Regex(@"(\S)\s*::$", RegexOptions.Singleline | RegexOptions.Compiled), "$1:"),
264+
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#interpreted-text
265+
(new Regex(@":[\w_\-+:.]+:`", RegexOptions.Singleline | RegexOptions.Compiled), "`"),
266+
(new Regex(@"`:[\w_\-+:.]+:", RegexOptions.Singleline | RegexOptions.Compiled), "`"),
267+
};
268+
245269
private string PreprocessTextLine(string line) {
246270
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#literal-blocks
247-
if (Regex.IsMatch(line, @"^\s*::$")) {
271+
if (LiteralBlockEmptyRegex.IsMatch(line)) {
248272
return string.Empty;
249273
}
250-
line = Regex.Replace(line, @"\s+::$", "");
251-
line = Regex.Replace(line, @"(\S)\s*::$", "$1:");
252274

253-
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#interpreted-text
254-
line = Regex.Replace(line, @":[\w_\-+:.]+:`", "`");
255-
line = Regex.Replace(line, @"`:[\w_\-+:.]+:", "`");
275+
foreach (var (regex, replacement) in LiteralBlockReplacements) {
276+
line = regex.Replace(line, replacement);
277+
}
256278

257279
line = line.Replace("``", "`");
258280
return line;
@@ -296,8 +318,10 @@ private void ParseBacktickBlock() {
296318
EatLine();
297319
}
298320

321+
private static readonly Regex DoctestRegex = new Regex(@" *>>> ", RegexOptions.Singleline | RegexOptions.Compiled);
322+
299323
private bool BeginDoctest() {
300-
if (!Regex.IsMatch(CurrentLine, @" *>>> ")) {
324+
if (!DoctestRegex.IsMatch(CurrentLine)) {
301325
return false;
302326
}
303327

@@ -387,8 +411,10 @@ private void ParseLiteralBlockSingleLine() {
387411
EatLine();
388412
}
389413

414+
private static readonly Regex SpaceDotDotRegex = new Regex(@"^\s*\.\. ", RegexOptions.Singleline | RegexOptions.Compiled);
415+
390416
private bool BeginDirective() {
391-
if (!Regex.IsMatch(CurrentLine, @"^\s*\.\. ")) {
417+
if (!SpaceDotDotRegex.IsMatch(CurrentLine)) {
392418
return false;
393419
}
394420

@@ -398,10 +424,12 @@ private bool BeginDirective() {
398424
return true;
399425
}
400426

427+
private static readonly Regex DirectiveLikeRegex = new Regex(@"^\s*\.\.\s+(\w+)::\s*(.*)$", RegexOptions.Singleline | RegexOptions.Compiled);
428+
401429
private void ParseDirective() {
402430
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#directives
403431

404-
var match = Regex.Match(CurrentLine, @"^\s*\.\.\s+(\w+)::\s*(.*)$");
432+
var match = DirectiveLikeRegex.Match(CurrentLine);
405433
if (match.Success) {
406434
var directiveType = match.Groups[1].Value;
407435
var directive = match.Groups[2].Value;

0 commit comments

Comments
 (0)