@@ -4,6 +4,8 @@ use regex::Regex;
44use ruff_python_ast:: PySourceType ;
55use ruff_python_formatter:: format_module_source;
66use ruff_python_trivia:: textwrap:: { dedent, indent} ;
7+ use ruff_source_file:: { Line , UniversalNewlines } ;
8+ use ruff_text_size:: { TextRange , TextSize } ;
79use ruff_workspace:: FormatterSettings ;
810
911#[ derive( Debug , PartialEq , Eq ) ]
@@ -12,67 +14,115 @@ pub enum MarkdownResult {
1214 Unchanged ,
1315}
1416
15- // TODO: account for ~~~ and arbitrary length code fences
1617// TODO: support code blocks nested inside block quotes, etc
17- static MARKDOWN_CODE_BLOCK : LazyLock < Regex > = LazyLock :: new ( || {
18- // adapted from blacken-docs
19- // https://github.com/adamchainz/blacken-docs/blob/fb107c1dce25f9206e29297aaa1ed7afc2980a5a/src/blacken_docs/__init__.py#L17
18+ static MARKDOWN_CODE_FENCE : LazyLock < Regex > = LazyLock :: new ( || {
2019 Regex :: new (
21- r"(?imsx)
22- (?<before>
23- ^(?<indent>\ *)```[^\S\r\n]*
24- (?<lang>(?:python|py|python3|py3|pyi)?)
25- (?:\ .*?)?\n
26- )
27- (?<code>.*?)
28- (?<after>
29- ^\ *```[^\S\r\n]*$
30- )
31- " ,
20+ r"(?ix)
21+ ^
22+ (?<indent>\s*)
23+ (?<fence>(?:```+|~~~+))\s*
24+ (?<language>(?:\w+)?)\s*
25+ (?<info>(?:.*))\s*
26+ $
27+ " ,
3228 )
3329 . unwrap ( )
3430} ) ;
3531
32+ static OFF_ON_DIRECTIVES : LazyLock < Regex > = LazyLock :: new ( || {
33+ Regex :: new (
34+ r"(?imx)
35+ ^
36+ \s*<!--\s*(?:blacken-docs|fmt)\s*:\s*(?<action>off|on)\s*-->
37+ " ,
38+ )
39+ . unwrap ( )
40+ } ) ;
41+
42+ #[ derive( Debug , Default , PartialEq , Eq ) ]
43+ enum MarkdownState {
44+ #[ default]
45+ On ,
46+ Off ,
47+ }
48+
3649pub fn format_code_blocks (
3750 source : & str ,
3851 path : Option < & Path > ,
3952 settings : & FormatterSettings ,
4053) -> MarkdownResult {
54+ let mut state = MarkdownState :: On ;
4155 let mut changed = false ;
4256 let mut formatted = String :: with_capacity ( source. len ( ) ) ;
43- let mut last_match = 0 ;
57+ let mut last_match = TextSize :: new ( 0 ) ;
4458
45- for capture in MARKDOWN_CODE_BLOCK . captures_iter ( source) {
46- let ( _, [ before, code_indent, language, code, after] ) = capture. extract ( ) ;
59+ let mut lines = source. universal_newlines ( ) . peekable ( ) ;
60+ while let Some ( line) = lines. next ( ) {
61+ // Toggle code block formatting off/on
62+ if let Some ( capture) = OFF_ON_DIRECTIVES . captures ( & line) {
63+ let ( _, [ action] ) = capture. extract ( ) ;
64+ state = match action {
65+ "off" => MarkdownState :: Off ,
66+ "on" => MarkdownState :: On ,
67+ _ => state,
68+ } ;
69+ // Process code blocks
70+ } else if let Some ( opening_capture) = MARKDOWN_CODE_FENCE . captures ( & line) {
71+ let ( _, [ code_indent, opening_fence, language, _info] ) = opening_capture. extract ( ) ;
72+ let start = lines. peek ( ) . map ( Line :: start) . unwrap_or_default ( ) ;
4773
48- let py_source_type = PySourceType :: from_extension ( language) ;
49- let unformatted_code = dedent ( code) ;
50- let options = settings. to_format_options ( py_source_type, & unformatted_code, path) ;
74+ // Consume lines until reaching the matching/ending code fence
75+ for code_line in lines. by_ref ( ) {
76+ let Some ( ( _, [ _, closing_fence, _, _] ) ) = MARKDOWN_CODE_FENCE
77+ . captures ( & code_line)
78+ . map ( |cap| cap. extract ( ) )
79+ else {
80+ continue ;
81+ } ;
5182
52- // Using `Printed::into_code` requires adding `ruff_formatter` as a direct dependency, and I suspect that Rust can optimize the closure away regardless.
53- #[ expect( clippy:: redundant_closure_for_method_calls) ]
54- let formatted_code =
55- format_module_source ( & unformatted_code, options) . map ( |formatted| formatted. into_code ( ) ) ;
83+ // Found the matching end of the code block
84+ if closing_fence == opening_fence {
85+ let language = language. to_ascii_lowercase ( ) ;
86+ if state == MarkdownState :: On
87+ && matches ! (
88+ language. as_str( ) ,
89+ "python" | "py" | "python3" | "py3" | "pyi" | ""
90+ )
91+ {
92+ // Maybe python, try formatting it
93+ let end = code_line. start ( ) ;
94+ let unformatted_code = dedent ( & source[ TextRange :: new ( start, end) ] ) ;
5695
57- if let Ok ( formatted_code) = formatted_code {
58- if formatted_code. len ( ) != unformatted_code. len ( ) || formatted_code != * unformatted_code
59- {
60- let m = capture. get_match ( ) ;
61- formatted. push_str ( & source[ last_match..m. start ( ) ] ) ;
96+ let py_source_type = PySourceType :: from_extension ( & language) ;
97+ let options =
98+ settings. to_format_options ( py_source_type, & unformatted_code, path) ;
6299
63- let indented_code = indent ( & formatted_code, code_indent) ;
64- // otherwise I need to deal with a result from write!
65- #[ expect( clippy:: format_push_string) ]
66- formatted. push_str ( & format ! ( "{before}{indented_code}{after}" ) ) ;
100+ // Using `Printed::into_code` requires adding `ruff_formatter` as a direct
101+ // dependency, and I suspect that Rust can optimize the closure away regardless.
102+ #[ expect( clippy:: redundant_closure_for_method_calls) ]
103+ let formatted_code = format_module_source ( & unformatted_code, options)
104+ . map ( |formatted| formatted. into_code ( ) ) ;
67105
68- last_match = m. end ( ) ;
69- changed = true ;
106+ // Formatting produced changes
107+ if let Ok ( formatted_code) = formatted_code
108+ && ( formatted_code. len ( ) != unformatted_code. len ( )
109+ || formatted_code != * unformatted_code)
110+ {
111+ formatted. push_str ( & source[ TextRange :: new ( last_match, start) ] ) ;
112+ let formatted_code = indent ( & formatted_code, code_indent) ;
113+ formatted. push_str ( & formatted_code) ;
114+ last_match = end;
115+ changed = true ;
116+ }
117+ }
118+ break ;
119+ }
70120 }
71121 }
72122 }
73123
74124 if changed {
75- formatted. push_str ( & source[ last_match..] ) ;
125+ formatted. push_str ( & source[ last_match. to_usize ( ) . .] ) ;
76126 MarkdownResult :: Formatted ( formatted)
77127 } else {
78128 MarkdownResult :: Unchanged
@@ -187,4 +237,151 @@ fn (foo: &str) -> &str {
187237 format_code_blocks( code, None , & FormatterSettings :: default ( ) ) ,
188238 @"Unchanged" ) ;
189239 }
240+
241+ #[ test]
242+ fn format_code_blocks_tildes ( ) {
243+ let code = r#"
244+ ~~~py
245+ print( 'hello' )
246+ ~~~
247+ "# ;
248+ assert_snapshot ! (
249+ format_code_blocks( code, None , & FormatterSettings :: default ( ) ) ,
250+ @r#"
251+ ~~~py
252+ print("hello")
253+ ~~~
254+ "# ) ;
255+ }
256+
257+ #[ test]
258+ fn format_code_blocks_long_fence ( ) {
259+ let code = r#"
260+ ````py
261+ print( 'hello' )
262+ ````
263+ ~~~~~py
264+ print( 'hello' )
265+ ~~~~~
266+ "# ;
267+ assert_snapshot ! (
268+ format_code_blocks( code, None , & FormatterSettings :: default ( ) ) ,
269+ @r#"
270+ ````py
271+ print("hello")
272+ ````
273+ ~~~~~py
274+ print("hello")
275+ ~~~~~
276+ "# ) ;
277+ }
278+
279+ #[ test]
280+ fn format_code_blocks_nested ( ) {
281+ let code = r#"
282+ ````markdown
283+ ```py
284+ print( 'hello' )
285+ ```
286+ ````
287+ "# ;
288+ assert_snapshot ! (
289+ format_code_blocks( code, None , & FormatterSettings :: default ( ) ) ,
290+ @"Unchanged" ) ;
291+ }
292+
293+ #[ test]
294+ fn format_code_blocks_ignore_blackendocs_off ( ) {
295+ let code = r#"
296+ ```py
297+ print( 'hello' )
298+ ```
299+
300+ <!-- blacken-docs:off -->
301+ ```py
302+ print( 'hello' )
303+ ```
304+ <!-- blacken-docs:on -->
305+
306+ ```py
307+ print( 'hello' )
308+ ```
309+ "# ;
310+ assert_snapshot ! ( format_code_blocks(
311+ code,
312+ None ,
313+ & FormatterSettings :: default ( )
314+ ) , @r#"
315+ ```py
316+ print("hello")
317+ ```
318+
319+ <!-- blacken-docs:off -->
320+ ```py
321+ print( 'hello' )
322+ ```
323+ <!-- blacken-docs:on -->
324+
325+ ```py
326+ print("hello")
327+ ```
328+ "# ) ;
329+ }
330+
331+ #[ test]
332+ fn format_code_blocks_ignore_ruff_off ( ) {
333+ let code = r#"
334+ ```py
335+ print( 'hello' )
336+ ```
337+
338+ <!-- fmt:off -->
339+ ```py
340+ print( 'hello' )
341+ ```
342+ <!-- fmt:on -->
343+
344+ ```py
345+ print( 'hello' )
346+ ```
347+ "# ;
348+ assert_snapshot ! ( format_code_blocks(
349+ code,
350+ None ,
351+ & FormatterSettings :: default ( )
352+ ) , @r#"
353+ ```py
354+ print("hello")
355+ ```
356+
357+ <!-- fmt:off -->
358+ ```py
359+ print( 'hello' )
360+ ```
361+ <!-- fmt:on -->
362+
363+ ```py
364+ print("hello")
365+ ```
366+ "# ) ;
367+ }
368+
369+ #[ test]
370+ fn format_code_blocks_ignore_to_end ( ) {
371+ let code = r#"
372+ <!-- fmt:off -->
373+ ```py
374+ print( 'hello' )
375+ ```
376+
377+ ```py
378+ print( 'hello' )
379+ ```
380+ "# ;
381+ assert_snapshot ! ( format_code_blocks(
382+ code,
383+ None ,
384+ & FormatterSettings :: default ( )
385+ ) , @"Unchanged" ) ;
386+ }
190387}
0 commit comments