23
23
24
24
namespace Microsoft . Python . LanguageServer . Documentation {
25
25
internal class DocstringConverter {
26
- private static readonly string [ ] PotentialHeaders = new [ ] { "=" , "-" , "~" , "+" } ;
27
-
28
26
/// <summary>
29
27
/// Converts a docstring to a plaintext, human readable form. This will
30
28
/// first strip any common leading indention (like inspect.cleandoc),
@@ -55,7 +53,7 @@ public static string ToPlaintext(string docstring) {
55
53
/// <returns>The converted docstring, with Environment.NewLine line endings.</returns>
56
54
public static string ToMarkdown ( string docstring ) => new DocstringConverter ( docstring ) . Convert ( ) ;
57
55
58
- private readonly StringBuilder _builder = new StringBuilder ( ) ;
56
+ private readonly StringBuilder _builder ;
59
57
private bool _skipAppendEmptyLine = true ;
60
58
private bool _insideInlineCode = false ;
61
59
private bool _appendDirectiveBlock = false ;
@@ -79,6 +77,7 @@ private int NextBlockIndent
79
77
private string CurrentLineWithinBlock => CurrentLine . Substring ( _blockIndent ) ;
80
78
81
79
private DocstringConverter ( string input ) {
80
+ _builder = new StringBuilder ( input . Length ) ;
82
81
_state = ParseText ;
83
82
_lines = SplitDocstring ( input ) ;
84
83
}
@@ -153,12 +152,28 @@ private void ParseText() {
153
152
EatLine ( ) ;
154
153
}
155
154
155
+ private static readonly Regex DirectivesExtraNewlineRegex = new Regex ( @"^\s*:(param|arg|type|return|rtype|raise|except|var|ivar|cvar|copyright|license)" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
156
+
157
+ private static readonly ( Regex , string ) [ ] PotentialHeaders = new [ ] {
158
+ ( new Regex ( @"^\s*=+(\s+=+)+$" , RegexOptions . Singleline | RegexOptions . Compiled ) , "=" ) ,
159
+ ( new Regex ( @"^\s*-+(\s+-+)+$" , RegexOptions . Singleline | RegexOptions . Compiled ) , "-" ) ,
160
+ ( new Regex ( @"^\s*~+(\s+~+)+$" , RegexOptions . Singleline | RegexOptions . Compiled ) , "~" ) ,
161
+ ( new Regex ( @"^\s*\++(\s+\++)+$" , RegexOptions . Singleline | RegexOptions . Compiled ) , "+" ) ,
162
+ } ;
163
+
164
+ private static readonly Regex WhitespaceRegex = new Regex ( @"\s" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
165
+
166
+ private static readonly Regex TildaHeaderRegex = new Regex ( @"^\s*~~~+$" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
167
+ private static readonly Regex PlusHeaderRegex = new Regex ( @"^\s*\+\+\++$" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
168
+ private static readonly Regex LeadingAsteriskRegex = new Regex ( @"^(\s+\* )(.*)$" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
169
+ private static readonly Regex UnescapedMarkdownCharsRegex = new Regex ( @"(?<!\\)([_*~])" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
170
+
156
171
private void AppendTextLine ( string line ) {
157
172
line = PreprocessTextLine ( line ) ;
158
173
159
174
// Hack: attempt to put directives lines into their own paragraphs.
160
175
// This should be removed once proper list-like parsing is written.
161
- if ( ! _insideInlineCode && Regex . IsMatch ( line , @"^\s*:(param|arg|type|return|rtype|raise|except|var|ivar|cvar|copyright|license)" ) ) {
176
+ if ( ! _insideInlineCode && DirectivesExtraNewlineRegex . IsMatch ( line ) ) {
162
177
AppendLine ( ) ;
163
178
}
164
179
@@ -181,17 +196,16 @@ private void AppendTextLine(string line) {
181
196
// Only one part, and not inside code, so check header cases.
182
197
if ( parts . Length == 1 ) {
183
198
// Handle weird separator lines which contain random spaces.
184
- foreach ( var h in PotentialHeaders ) {
185
- var hEsc = Regex . Escape ( h ) ;
186
- if ( Regex . IsMatch ( part , $ "^\\ s*{ hEsc } +(\\ s+{ hEsc } +)+$") ) {
187
- part = Regex . Replace ( part , @"\s" , h ) ;
199
+ foreach ( var ( regex , replacement ) in PotentialHeaders ) {
200
+ if ( regex . IsMatch ( part ) ) {
201
+ part = WhitespaceRegex . Replace ( part , replacement ) ;
188
202
break ;
189
203
}
190
204
}
191
205
192
206
// Replace ReST style ~~~ header to prevent it being interpreted as a code block
193
207
// (an alternative in Markdown to triple backtick blocks).
194
- if ( Regex . IsMatch ( part , @"^\s*~~~+$" ) ) {
208
+ if ( TildaHeaderRegex . IsMatch ( part ) ) {
195
209
Append ( part . Replace ( '~' , '-' ) ) ;
196
210
continue ;
197
211
}
@@ -200,7 +214,7 @@ private void AppendTextLine(string line) {
200
214
// TODO: Handle the rest of these, and the precedence order (which depends on the
201
215
// order heading lines are seen, not what the line contains).
202
216
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#sections
203
- if ( Regex . IsMatch ( part , @"^\s*\+\+\++$" ) ) {
217
+ if ( PlusHeaderRegex . IsMatch ( part ) ) {
204
218
Append ( part . Replace ( '+' , '-' ) ) ;
205
219
continue ;
206
220
}
@@ -211,7 +225,7 @@ private void AppendTextLine(string line) {
211
225
// TODO: Replace this with real list parsing. This may have
212
226
// false positives and cause random italics when the ReST list
213
227
// doesn't match Markdown's specification.
214
- var match = Regex . Match ( part , @"^(\s+\* )(.*)$" ) ;
228
+ var match = LeadingAsteriskRegex . Match ( part ) ;
215
229
if ( match . Success ) {
216
230
Append ( match . Groups [ 1 ] . Value ) ;
217
231
part = match . Groups [ 2 ] . Value ;
@@ -230,7 +244,7 @@ private void AppendTextLine(string line) {
230
244
// TODO: Strip footnote/citation references.
231
245
232
246
// Escape _, *, and ~, but ignore things like ":param \*\*kwargs:".
233
- part = Regex . Replace ( part , @"(?<!\\)([_*~])" , @"\$1" ) ;
247
+ part = UnescapedMarkdownCharsRegex . Replace ( part , @"\$1" ) ;
234
248
235
249
Append ( part ) ;
236
250
}
@@ -242,17 +256,25 @@ private void AppendTextLine(string line) {
242
256
_builder . AppendLine ( ) ;
243
257
}
244
258
259
+ // http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#literal-blocks
260
+ private static readonly Regex LiteralBlockEmptyRegex = new Regex ( @"^\s*::$" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
261
+ private static readonly ( Regex , string ) [ ] LiteralBlockReplacements = new [ ] {
262
+ ( new Regex ( @"\s+::$" , RegexOptions . Singleline | RegexOptions . Compiled ) , "" ) ,
263
+ ( new Regex ( @"(\S)\s*::$" , RegexOptions . Singleline | RegexOptions . Compiled ) , "$1:" ) ,
264
+ // http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#interpreted-text
265
+ ( new Regex ( @":[\w_\-+:.]+:`" , RegexOptions . Singleline | RegexOptions . Compiled ) , "`" ) ,
266
+ ( new Regex ( @"`:[\w_\-+:.]+:" , RegexOptions . Singleline | RegexOptions . Compiled ) , "`" ) ,
267
+ } ;
268
+
245
269
private string PreprocessTextLine ( string line ) {
246
270
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#literal-blocks
247
- if ( Regex . IsMatch ( line , @"^\s*::$" ) ) {
271
+ if ( LiteralBlockEmptyRegex . IsMatch ( line ) ) {
248
272
return string . Empty ;
249
273
}
250
- line = Regex . Replace ( line , @"\s+::$" , "" ) ;
251
- line = Regex . Replace ( line , @"(\S)\s*::$" , "$1:" ) ;
252
274
253
- // http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#interpreted-text
254
- line = Regex . Replace ( line , @":[\w_\-+:.]+:`" , "`" ) ;
255
- line = Regex . Replace ( line , @"`:[\w_\-+:.]+:" , "`" ) ;
275
+ foreach ( var ( regex , replacement ) in LiteralBlockReplacements ) {
276
+ line = regex . Replace ( line , replacement ) ;
277
+ }
256
278
257
279
line = line . Replace ( "``" , "`" ) ;
258
280
return line ;
@@ -296,8 +318,10 @@ private void ParseBacktickBlock() {
296
318
EatLine ( ) ;
297
319
}
298
320
321
+ private static readonly Regex DoctestRegex = new Regex ( @" *>>> " , RegexOptions . Singleline | RegexOptions . Compiled ) ;
322
+
299
323
private bool BeginDoctest ( ) {
300
- if ( ! Regex . IsMatch ( CurrentLine , @" *>>> " ) ) {
324
+ if ( ! DoctestRegex . IsMatch ( CurrentLine ) ) {
301
325
return false ;
302
326
}
303
327
@@ -387,8 +411,10 @@ private void ParseLiteralBlockSingleLine() {
387
411
EatLine ( ) ;
388
412
}
389
413
414
+ private static readonly Regex SpaceDotDotRegex = new Regex ( @"^\s*\.\. " , RegexOptions . Singleline | RegexOptions . Compiled ) ;
415
+
390
416
private bool BeginDirective ( ) {
391
- if ( ! Regex . IsMatch ( CurrentLine , @"^\s*\.\. " ) ) {
417
+ if ( ! SpaceDotDotRegex . IsMatch ( CurrentLine ) ) {
392
418
return false ;
393
419
}
394
420
@@ -398,10 +424,12 @@ private bool BeginDirective() {
398
424
return true ;
399
425
}
400
426
427
+ private static readonly Regex DirectiveLikeRegex = new Regex ( @"^\s*\.\.\s+(\w+)::\s*(.*)$" , RegexOptions . Singleline | RegexOptions . Compiled ) ;
428
+
401
429
private void ParseDirective ( ) {
402
430
// http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#directives
403
431
404
- var match = Regex . Match ( CurrentLine , @"^\s*\.\.\s+(\w+)::\s*(.*)$" ) ;
432
+ var match = DirectiveLikeRegex . Match ( CurrentLine ) ;
405
433
if ( match . Success ) {
406
434
var directiveType = match . Groups [ 1 ] . Value ;
407
435
var directive = match . Groups [ 2 ] . Value ;
0 commit comments