Skip to content

Commit 649c24d

Browse files
authored
Add KDL lexer (#1192)
Based on the not-yet-merged PR by @chinatsu: pygments/pygments#2936 <img width="790" height="554" alt="image" src="https://github.com/user-attachments/assets/430e48ff-39c0-41d7-942f-0406cb1270be" /> Closes #1166
1 parent 249a634 commit 649c24d

3 files changed

Lines changed: 180 additions & 0 deletions

File tree

lexers/embedded/kdl.xml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<lexer>
2+
<config>
3+
<name>KDL</name>
4+
<alias>kdl</alias>
5+
<filename>*.kdl</filename>
6+
</config>
7+
<rules>
8+
<state name="root">
9+
<rule pattern="((?&lt;={|;)|^)\s*(?![/\\\{\}#;\[\]\=])[&lt;&gt;:\w\-_~,\&#x27;`!\?@\$%^&amp;*+|\.\(\)\x{0080}-\x{0001f645}]+\d*?[&lt;&gt;:\w\-_~,\&#x27;`!\?@\$%^&amp;*+|\.\(\)\x{0080}-\x{0001f645}]*?"><token type="NameLabel"/></rule>
10+
<rule pattern="(#true|#false|#null|#nan|#inf|#-inf)\b"><token type="KeywordConstant"/></rule>
11+
<rule pattern="[{}=;\\]"><token type="Operator"/></rule>
12+
<rule pattern="(\b([0-9-\+]|-|\+)[0-9_]*?\.[0-9][0-9_]*?([eE][+-]?[0-9_]+)?\b|\b[0-9][0-9_]*?(\.[0-9][0-9_]*?)?[eE][+-]?[0-9_]+\b)"><token type="LiteralNumberFloat"/></rule>
13+
<rule pattern="\b[0-9\-\+][0-9_]*\b"><token type="LiteralNumber"/></rule>
14+
<rule pattern="\b0x[a-fA-F0-9][a-fA-F0-9_]*?\b"><token type="LiteralNumberHex"/></rule>
15+
<rule pattern="\b0o[0-7][0-7_]*\b"><token type="LiteralNumberOct"/></rule>
16+
<rule pattern="\b0b[01][01_]*?\b"><token type="LiteralNumberBin"/></rule>
17+
<rule pattern="#+(\&quot;&quot;&quot;|&quot;).*?(&quot;&quot;&quot;|&quot;)#+"><token type="LiteralString"/></rule>
18+
<rule pattern="#?&quot;&quot;&quot;"><token type="LiteralString"/><push state="multiline_string"/></rule>
19+
<rule pattern="#?&quot;"><token type="LiteralString"/><push state="string"/></rule>
20+
<rule pattern="/\*"><token type="CommentMultiline"/><push state="comment"/></rule>
21+
<rule pattern="/\*!"><token type="LiteralStringDoc"/><push state="doccomment"/></rule>
22+
<rule pattern="/-\s*{"><token type="CommentMultiline"/><push state="slashdash_block_comment"/></rule>
23+
<rule pattern="\s*/-\s?[^\s=]*?\s?{"><token type="CommentMultiline"/><push state="slashdash_node_comment"/></rule>
24+
<rule pattern="(?&lt;!^)\s*/-\s*(&quot;.*&quot;|.*?)?\s"><token type="CommentSingle"/></rule>
25+
<rule pattern="(?&lt;=^)\s*/-[^{]+{"><token type="CommentMultiline"/><push state="slashdash_node_with_children_comment"/></rule>
26+
<rule pattern="(\/\/(.*?)\n|(?&lt;!^)\s*/-\s*?\s)"><token type="CommentSingle"/></rule>
27+
<rule pattern="(?![/\\\{\}#;\[\]\=])[&lt;&gt;:\w\-_~,\&#x27;`!\?@\$%^&amp;*+|.\(\)\x{0080}-\x{0001f645}]+\d*?[&lt;&gt;:\w\-_~,\&#x27;`!\?@\$%^&amp;*+|.\(\)\x{0080}-\x{0001f645}]*(=)"><token type="NameAttribute"/></rule>
28+
<rule pattern="(?![/\\{\}#;\[\]\=])[&lt;&gt;:\w\-_~,\&#x27;`!\?@\$%^&amp;*+|.\(\)\x{0080}-\x{0001f645}]+\d*[&lt;&gt;:\w\-_~,\&#x27;`!\?@\$%^&amp;*+|.\(\)\x{0080}-\x{0001f645}]*?"><token type="LiteralString"/></rule>
29+
<rule pattern="\s"><token type="TextWhitespace"/></rule>
30+
</state>
31+
<state name="string">
32+
<rule pattern="&quot;#?"><token type="LiteralString"/><pop depth="1"/></rule>
33+
<rule pattern="\\[&#x27;&quot;\\nrt]|\\x[0-7][0-9a-fA-F]|\\0|\\u\{[0-9a-fA-F]{1,6}\}"><token type="LiteralStringEscape"/></rule>
34+
<rule pattern="[^\\&quot;]+"><token type="LiteralString"/></rule>
35+
<rule pattern="\\"><token type="LiteralString"/></rule>
36+
</state>
37+
<state name="multiline_string">
38+
<rule pattern="&quot;&quot;&quot;#?"><token type="LiteralString"/><pop depth="1"/></rule>
39+
<rule pattern="\\[&#x27;&quot;\\nrt]|\\x[0-7][0-9a-fA-F]|\\0|\\u\{[0-9a-fA-F]{1,6}\}"><token type="LiteralStringEscape"/></rule>
40+
<rule pattern="&quot;"><token type="LiteralString"/></rule>
41+
<rule pattern="[^\\&quot;]+"><token type="LiteralString"/></rule>
42+
<rule pattern="\\"><token type="LiteralString"/></rule>
43+
</state>
44+
<state name="slashdash_block_comment">
45+
<rule pattern="[^}]+"><token type="CommentMultiline"/></rule>
46+
<rule pattern="/-\s*{"><token type="CommentMultiline"/><push/></rule>
47+
<rule pattern="\}"><token type="CommentMultiline"/><pop depth="1"/></rule>
48+
<rule pattern="[\}]"><token type="CommentMultiline"/></rule>
49+
</state>
50+
<state name="slashdash_node_comment">
51+
<rule pattern="[^\}]+"><token type="CommentMultiline"/></rule>
52+
<rule pattern="^\s*?/-.*?\s?{"><token type="CommentMultiline"/><push/></rule>
53+
<rule pattern="\}"><token type="CommentMultiline"/><pop depth="1"/></rule>
54+
<rule pattern="[\}]"><token type="CommentMultiline"/></rule>
55+
</state>
56+
<state name="slashdash_node_with_children_comment">
57+
<rule pattern="[^\}]+"><token type="CommentMultiline"/></rule>
58+
<rule pattern="(?&lt;=^)\s*/-[^{]+{"><token type="CommentMultiline"/><push/></rule>
59+
<rule pattern="\}"><token type="CommentMultiline"/><pop depth="1"/></rule>
60+
<rule pattern="[\}]"><token type="CommentMultiline"/></rule>
61+
</state>
62+
<state name="comment">
63+
<rule pattern="[^*/]+"><token type="CommentMultiline"/></rule>
64+
<rule pattern="/\*"><token type="CommentMultiline"/><push/></rule>
65+
<rule pattern="\*/"><token type="CommentMultiline"/><pop depth="1"/></rule>
66+
<rule pattern="[*/]"><token type="CommentMultiline"/></rule>
67+
</state>
68+
<state name="doccomment">
69+
<rule pattern="[^*/]+"><token type="LiteralStringDoc"/></rule>
70+
<rule pattern="/\*"><token type="LiteralStringDoc"/><push/></rule>
71+
<rule pattern="\*/"><token type="LiteralStringDoc"/><pop depth="1"/></rule>
72+
<rule pattern="[*/]"><token type="LiteralStringDoc"/></rule>
73+
</state>
74+
</rules>
75+
</lexer>

lexers/testdata/kdl.actual

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/- {
2+
a
3+
}
4+
5+
a; b; c;
6+
7+
cool /-a="thing" plus="something else" {
8+
stuff
9+
}
10+
11+
a 12.42e8
12+
13+
// Nodes can be separated into multiple lines
14+
title \
15+
"Some title"
16+
17+
18+
// Files must be utf8 encoded!
19+
smile 😁
20+
21+
// Node names and property keys are just strings, so you can write them like
22+
// quoted or raw strings, too!
23+
"illegal(){}[]/\\=#;identifier" #"1.2.3"# "#false"=#true
24+
25+
// Identifiers are very flexible. The following is a legal bare identifier:
26+
-<123~!$@%^&*,.:'`|?+>
27+
28+
// And you can also use non-ASCII unicode!
29+
ノード お名前=ฅ^•ﻌ•^ฅ
30+
31+
// kdl specifically allows properties and values to be
32+
// interspersed with each other, much like CLI commands.
33+
foo bar=#true baz quux=#false 1 2 3

lexers/testdata/kdl.expected

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
[
2+
{"type":"CommentMultiline","value":"/- {\n a\n}"},
3+
{"type":"TextWhitespace","value":"\n"},
4+
{"type":"NameLabel","value":"\na"},
5+
{"type":"Operator","value":";"},
6+
{"type":"NameLabel","value":" b"},
7+
{"type":"Operator","value":";"},
8+
{"type":"NameLabel","value":" c"},
9+
{"type":"Operator","value":";"},
10+
{"type":"NameLabel","value":"\n\ncool"},
11+
{"type":"CommentSingle","value":" /-a=\"thing\" "},
12+
{"type":"NameAttribute","value":"plus="},
13+
{"type":"LiteralString","value":"\"something else\""},
14+
{"type":"TextWhitespace","value":" "},
15+
{"type":"Operator","value":"{"},
16+
{"type":"NameLabel","value":"\n stuff"},
17+
{"type":"TextWhitespace","value":"\n"},
18+
{"type":"Operator","value":"}"},
19+
{"type":"TextWhitespace","value":"\n"},
20+
{"type":"NameLabel","value":"\na"},
21+
{"type":"TextWhitespace","value":" "},
22+
{"type":"LiteralNumberFloat","value":"12.42e8"},
23+
{"type":"TextWhitespace","value":"\n\n"},
24+
{"type":"CommentSingle","value":"// Nodes can be separated into multiple lines\n"},
25+
{"type":"NameLabel","value":"title"},
26+
{"type":"TextWhitespace","value":" "},
27+
{"type":"Operator","value":"\\"},
28+
{"type":"TextWhitespace","value":"\n "},
29+
{"type":"LiteralString","value":"\"Some title\""},
30+
{"type":"TextWhitespace","value":"\n\n\n"},
31+
{"type":"CommentSingle","value":"// Files must be utf8 encoded!\n"},
32+
{"type":"NameLabel","value":"smile"},
33+
{"type":"TextWhitespace","value":" "},
34+
{"type":"LiteralString","value":"😁"},
35+
{"type":"TextWhitespace","value":"\n\n"},
36+
{"type":"CommentSingle","value":"// Node names and property keys are just strings, so you can write them like\n// quoted or raw strings, too!\n"},
37+
{"type":"LiteralString","value":"\"illegal(){}[]/"},
38+
{"type":"LiteralStringEscape","value":"\\\\"},
39+
{"type":"LiteralString","value":"=#;identifier\""},
40+
{"type":"TextWhitespace","value":" "},
41+
{"type":"LiteralString","value":"#\"1.2.3\"#"},
42+
{"type":"TextWhitespace","value":" "},
43+
{"type":"LiteralString","value":"\"#false\""},
44+
{"type":"Operator","value":"="},
45+
{"type":"KeywordConstant","value":"#true"},
46+
{"type":"TextWhitespace","value":"\n\n"},
47+
{"type":"CommentSingle","value":"// Identifiers are very flexible. The following is a legal bare identifier:\n"},
48+
{"type":"NameLabel","value":"-\u003c123~!$@%^\u0026*,.:'`|?+\u003e"},
49+
{"type":"TextWhitespace","value":"\n\n"},
50+
{"type":"CommentSingle","value":"// And you can also use non-ASCII unicode!\n"},
51+
{"type":"NameLabel","value":"ノード お名前"},
52+
{"type":"Operator","value":"="},
53+
{"type":"LiteralString","value":"ฅ^•ﻌ•^ฅ"},
54+
{"type":"TextWhitespace","value":"\n\n"},
55+
{"type":"CommentSingle","value":"// kdl specifically allows properties and values to be\n// interspersed with each other, much like CLI commands.\n"},
56+
{"type":"NameLabel","value":"foo"},
57+
{"type":"TextWhitespace","value":" "},
58+
{"type":"NameAttribute","value":"bar="},
59+
{"type":"KeywordConstant","value":"#true"},
60+
{"type":"TextWhitespace","value":" "},
61+
{"type":"LiteralString","value":"baz"},
62+
{"type":"TextWhitespace","value":" "},
63+
{"type":"NameAttribute","value":"quux="},
64+
{"type":"KeywordConstant","value":"#false"},
65+
{"type":"TextWhitespace","value":" "},
66+
{"type":"LiteralNumber","value":"1"},
67+
{"type":"TextWhitespace","value":" "},
68+
{"type":"LiteralNumber","value":"2"},
69+
{"type":"TextWhitespace","value":" "},
70+
{"type":"LiteralNumber","value":"3"},
71+
{"type":"TextWhitespace","value":"\n"}
72+
]

0 commit comments

Comments
 (0)