Skip to content

Commit f3be4c6

Browse files
Create lexer for Gemtext (#1102)
1 parent a0c6dff commit f3be4c6

File tree

3 files changed

+259
-0
lines changed

3 files changed

+259
-0
lines changed

lexers/gemtext.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package lexers
2+
3+
import (
4+
. "github.com/alecthomas/chroma/v2" // nolint
5+
)
6+
7+
// Gemtext lexer.
8+
var Gemtext = Register(MustNewLexer(
9+
&Config{
10+
Name: "Gemtext",
11+
Aliases: []string{"gemtext", "gmi", "gmni", "gemini"},
12+
Filenames: []string{"*.gmi", "*.gmni", "*.gemini"},
13+
MimeTypes: []string{"text/gemini"},
14+
},
15+
gemtextRules,
16+
))
17+
18+
func gemtextRules() Rules {
19+
return Rules{
20+
"root": {
21+
{`^(#[^#].+\r?\n)`, ByGroups(GenericHeading), nil},
22+
{`^(#{2,3}.+\r?\n)`, ByGroups(GenericSubheading), nil},
23+
{`^(\* )(.+\r?\n)`, ByGroups(Keyword, Text), nil},
24+
{`^(>)(.+\r?\n)`, ByGroups(Keyword, GenericEmph), nil},
25+
{"^(```\\r?\\n)([\\w\\W]*?)(^```)(.+\\r?\\n)?", ByGroups(String, Text, String, Comment), nil},
26+
{
27+
"^(```)(\\w+)(\\r?\\n)([\\w\\W]*?)(^```)(.+\\r?\\n)?",
28+
UsingByGroup(2, 4, String, String, String, Text, String, Comment),
29+
nil,
30+
},
31+
{"^(```)(.+\\r?\\n)([\\w\\W]*?)(^```)(.+\\r?\\n)?", ByGroups(String, String, Text, String, Comment), nil},
32+
{`^(=>)(\s*)([^\s]+)(\s*)$`, ByGroups(Keyword, Text, NameAttribute, Text), nil},
33+
{`^(=>)(\s*)([^\s]+)(\s+)(.+)$`, ByGroups(Keyword, Text, NameAttribute, Text, NameTag), nil},
34+
{`(.|(?:\r?\n))`, ByGroups(Text), nil},
35+
},
36+
}
37+
}

lexers/testdata/gemtext.actual

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# This is a heading!
2+
3+
#This is also a heading!
4+
5+
This is some text. It is not special.
6+
7+
## Another heading
8+
9+
##yes, leading spaces are optional
10+
11+
=> gemini://geminiprotocol.net/docs/gemtext-specification.gmi
12+
13+
=> gemini://geminiprotocol.net/docs/gemtext-specification.gmi Here is a friendly link to the Gemtext specification
14+
15+
=> https://geminiprotocol.net/docs/gemtext-specification.gmi gemini protocol isn't special for links
16+
17+
=> gemini://geminiprotocol.net/ multiple kinds of whitespace
18+
19+
=> gemini://geminiprotocol.net/ multiple whitespace between parts!
20+
21+
=> /foo/bar/baz.txt leading slash still counts
22+
23+
Links may also be closer together. Here are some examples from the spec:
24+
=> gemini://example.org/
25+
=> gemini://example.org/ An example link
26+
=> gemini://example.org/foo Another example link at the same host
27+
=> foo/bar/baz.txt A relative link
28+
=> gopher://example.org:70/1 A gopher link
29+
30+
=>this shouldn't be a link, but it is.
31+
=>gemini://example.org/ the leading space is optional
32+
33+
### Why not try lists?
34+
35+
36+
###spaces are still optional
37+
38+
* This is a list item.
39+
* Here's another.
40+
* Still a list item!
41+
*This is not.
42+
** This is also not.
43+
* Neither is this.
44+
- Not a list item.
45+
- Not a list item.
46+
47+
> Someone said this.
48+
49+
>Someone also said this.
50+
51+
```
52+
This text is preformatted.
53+
54+
# Hello, world!
55+
56+
This isn't to be treated as Gemtext:
57+
=> gemini://example.com
58+
59+
> no one said this
60+
```
61+
62+
```
63+
This is also plaintext
64+
```This text is ignored
65+
66+
67+
```This is alt text
68+
This text is also preformatted.
69+
```this text is ignored.
70+
71+
```Art by Joan Stark of a camp site. A small tent faces a small campfire. There is a log nearby, perfect for sitting on. The initials "jgs" can be seen.
72+
______
73+
jgs / /\
74+
/ / \
75+
/_____/----\_ (
76+
" " ).
77+
_ ___ o (:') o
78+
(@))_)) o ~/~~\~ o
79+
o o o
80+
```
81+
82+
Syntax highlighting may be applied to preformatted blocks:
83+
```javascript
84+
column.substring(0,num)
85+
```
86+
87+
The spec says, "Any text following the leading "```" of a preformat toggle line MUST be ignored by clients." So...
88+
```
89+
```this is a comment!
90+
wow, what WAS that??

lexers/testdata/gemtext.expected

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
[
2+
{"type":"GenericHeading","value":"# This is a heading!\n"},
3+
{"type":"Text","value":"\n"},
4+
{"type":"GenericHeading","value":"#This is also a heading!\n"},
5+
{"type":"Text","value":"\nThis is some text. It is not special.\n\n"},
6+
{"type":"GenericSubheading","value":"## Another heading\n"},
7+
{"type":"Text","value":"\n"},
8+
{"type":"GenericSubheading","value":"##yes, leading spaces are optional\n"},
9+
{"type":"Text","value":"\n"},
10+
{"type":"Keyword","value":"=\u003e"},
11+
{"type":"Text","value":" "},
12+
{"type":"NameAttribute","value":"gemini://geminiprotocol.net/docs/gemtext-specification.gmi"},
13+
{"type":"Text","value":"\n\n"},
14+
{"type":"Keyword","value":"=\u003e"},
15+
{"type":"Text","value":" "},
16+
{"type":"NameAttribute","value":"gemini://geminiprotocol.net/docs/gemtext-specification.gmi"},
17+
{"type":"Text","value":" "},
18+
{"type":"NameTag","value":"Here is a friendly link to the Gemtext specification"},
19+
{"type":"Text","value":"\n\n"},
20+
{"type":"Keyword","value":"=\u003e"},
21+
{"type":"Text","value":" "},
22+
{"type":"NameAttribute","value":"https://geminiprotocol.net/docs/gemtext-specification.gmi"},
23+
{"type":"Text","value":" "},
24+
{"type":"NameTag","value":"gemini protocol isn't special for links"},
25+
{"type":"Text","value":"\n\n"},
26+
{"type":"Keyword","value":"=\u003e"},
27+
{"type":"Text","value":" "},
28+
{"type":"NameAttribute","value":"gemini://geminiprotocol.net/"},
29+
{"type":"Text","value":"\t"},
30+
{"type":"NameTag","value":"multiple kinds of whitespace"},
31+
{"type":"Text","value":"\n\n"},
32+
{"type":"Keyword","value":"=\u003e"},
33+
{"type":"Text","value":" "},
34+
{"type":"NameAttribute","value":"gemini://geminiprotocol.net/"},
35+
{"type":"Text","value":"\t "},
36+
{"type":"NameTag","value":"multiple whitespace between parts!"},
37+
{"type":"Text","value":"\n\n"},
38+
{"type":"Keyword","value":"=\u003e"},
39+
{"type":"Text","value":" "},
40+
{"type":"NameAttribute","value":"/foo/bar/baz.txt"},
41+
{"type":"Text","value":" "},
42+
{"type":"NameTag","value":"leading slash still counts"},
43+
{"type":"Text","value":"\n\nLinks may also be closer together. Here are some examples from the spec:\n"},
44+
{"type":"Keyword","value":"=\u003e"},
45+
{"type":"Text","value":" "},
46+
{"type":"NameAttribute","value":"gemini://example.org/"},
47+
{"type":"Text","value":"\n"},
48+
{"type":"Keyword","value":"=\u003e"},
49+
{"type":"Text","value":" "},
50+
{"type":"NameAttribute","value":"gemini://example.org/"},
51+
{"type":"Text","value":" "},
52+
{"type":"NameTag","value":"An example link"},
53+
{"type":"Text","value":"\n"},
54+
{"type":"Keyword","value":"=\u003e"},
55+
{"type":"Text","value":" "},
56+
{"type":"NameAttribute","value":"gemini://example.org/foo"},
57+
{"type":"Text","value":"\t"},
58+
{"type":"NameTag","value":"Another example link at the same host"},
59+
{"type":"Text","value":"\n"},
60+
{"type":"Keyword","value":"=\u003e"},
61+
{"type":"Text","value":" "},
62+
{"type":"NameAttribute","value":"foo/bar/baz.txt"},
63+
{"type":"Text","value":"\t"},
64+
{"type":"NameTag","value":"A relative link"},
65+
{"type":"Text","value":"\n"},
66+
{"type":"Keyword","value":"=\u003e"},
67+
{"type":"Text","value":" \t"},
68+
{"type":"NameAttribute","value":"gopher://example.org:70/1"},
69+
{"type":"Text","value":" "},
70+
{"type":"NameTag","value":"A gopher link"},
71+
{"type":"Text","value":"\n\n"},
72+
{"type":"Keyword","value":"=\u003e"},
73+
{"type":"NameAttribute","value":"this"},
74+
{"type":"Text","value":" "},
75+
{"type":"NameTag","value":"shouldn't be a link, but it is. "},
76+
{"type":"Text","value":"\n"},
77+
{"type":"Keyword","value":"=\u003e"},
78+
{"type":"NameAttribute","value":"gemini://example.org/"},
79+
{"type":"Text","value":" "},
80+
{"type":"NameTag","value":"the leading space is optional"},
81+
{"type":"Text","value":"\n\n"},
82+
{"type":"GenericSubheading","value":"### Why not try lists?\n"},
83+
{"type":"Text","value":"\n\n"},
84+
{"type":"GenericSubheading","value":"###spaces are still optional\n"},
85+
{"type":"Text","value":"\n"},
86+
{"type":"Keyword","value":"* "},
87+
{"type":"Text","value":"This is a list item.\n"},
88+
{"type":"Keyword","value":"* "},
89+
{"type":"Text","value":"Here's another.\n"},
90+
{"type":"Keyword","value":"* "},
91+
{"type":"Text","value":" Still a list item!\n*This is not.\n** This is also not.\n*\tNeither is this.\n- Not a list item.\n - Not a list item.\n\n"},
92+
{"type":"Keyword","value":"\u003e"},
93+
{"type":"GenericEmph","value":" Someone said this.\n"},
94+
{"type":"Text","value":"\n"},
95+
{"type":"Keyword","value":"\u003e"},
96+
{"type":"GenericEmph","value":"Someone also said this.\n"},
97+
{"type":"Text","value":"\n"},
98+
{"type":"LiteralString","value":"```\n"},
99+
{"type":"Text","value":"This text is preformatted.\n\n# Hello, world!\n\nThis isn't to be treated as Gemtext:\n=\u003e gemini://example.com\n\n\u003e no one said this\n"},
100+
{"type":"LiteralString","value":"```"},
101+
{"type":"Text","value":"\n\n"},
102+
{"type":"LiteralString","value":"```\n"},
103+
{"type":"Text","value":"This is also plaintext\n"},
104+
{"type":"LiteralString","value":"```"},
105+
{"type":"Comment","value":"This text is ignored\n"},
106+
{"type":"Text","value":"\n\n"},
107+
{"type":"LiteralString","value":"```This is alt text\n"},
108+
{"type":"Text","value":"This text is also preformatted.\n"},
109+
{"type":"LiteralString","value":"```"},
110+
{"type":"Comment","value":"this text is ignored.\n"},
111+
{"type":"Text","value":"\n"},
112+
{"type":"LiteralString","value":"```Art by Joan Stark of a camp site. A small tent faces a small campfire. There is a log nearby, perfect for sitting on. The initials \"jgs\" can be seen.\n"},
113+
{"type":"Text","value":" ______\njgs / /\\\n / / \\\n /_____/----\\_ (\n \" \" ).\n _ ___ o (:') o\n (@))_)) o ~/~~\\~ o\n o o o\n"},
114+
{"type":"LiteralString","value":"```"},
115+
{"type":"Text","value":"\n\nSyntax highlighting may be applied to preformatted blocks:\n"},
116+
{"type":"LiteralString","value":"```javascript\n"},
117+
{"type":"Text","value":"\t"},
118+
{"type":"NameOther","value":"column"},
119+
{"type":"Punctuation","value":"."},
120+
{"type":"NameOther","value":"substring"},
121+
{"type":"Punctuation","value":"("},
122+
{"type":"LiteralNumberInteger","value":"0"},
123+
{"type":"Punctuation","value":","},
124+
{"type":"NameOther","value":"num"},
125+
{"type":"Punctuation","value":")"},
126+
{"type":"Text","value":"\n"},
127+
{"type":"LiteralString","value":"```"},
128+
{"type":"Text","value":"\n\nThe spec says, \"Any text following the leading \"```\" of a preformat toggle line MUST be ignored by clients.\" So...\n"},
129+
{"type":"LiteralString","value":"```\n```"},
130+
{"type":"Comment","value":"this is a comment!\n"},
131+
{"type":"Text","value":"wow, what WAS that??\n"}
132+
]

0 commit comments

Comments
 (0)