Skip to content

Commit 678b799

Browse files
authored
Add a lexer for the Materialize SQL dialect (#896)
1 parent c74f557 commit 678b799

File tree

2 files changed

+156
-1
lines changed

2 files changed

+156
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ translators for Pygments lexers and styles.
5050
| J | J, Java, JavaScript, JSON, Julia, Jungle |
5151
| K | Kotlin |
5252
| L | Lighttpd configuration file, LLVM, Lua |
53-
| M | Makefile, Mako, markdown, Mason, Mathematica, Matlab, mcfunction, Meson, Metal, MiniZinc, MLIR, Modula-2, MonkeyC, MorrowindScript, Myghty, MySQL |
53+
| M | Makefile, Mako, markdown, Mason, Materialize SQL dialect, Mathematica, Matlab, mcfunction, Meson, Metal, MiniZinc, MLIR, Modula-2, MonkeyC, MorrowindScript, Myghty, MySQL |
5454
| N | NASM, Natural, Newspeak, Nginx configuration file, Nim, Nix |
5555
| O | Objective-C, OCaml, Octave, Odin, OnesEnterprise, OpenEdge ABL, OpenSCAD, Org Mode |
5656
| P | PacmanConf, Perl, PHP, PHTML, Pig, PkgConfig, PL/pgSQL, plaintext, Plutus Core, Pony, PostgreSQL SQL dialect, PostScript, POVRay, PowerQuery, PowerShell, Prolog, PromQL, properties, Protocol Buffer, PRQL, PSL, Puppet, Python, Python 2 |
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
<lexer>
2+
<config>
3+
<name>Materialize SQL dialect</name>
4+
<alias>materialize</alias>
5+
<alias>mzsql</alias>
6+
<mime_type>text/x-materializesql</mime_type>
7+
<case_insensitive>true</case_insensitive>
8+
<not_multiline>true</not_multiline>
9+
</config>
10+
<rules>
11+
<state name="root">
12+
<rule pattern="\s+">
13+
<token type="Text"/>
14+
</rule>
15+
<rule pattern="--.*\n?">
16+
<token type="CommentSingle"/>
17+
</rule>
18+
<rule pattern="/\*">
19+
<token type="CommentMultiline"/>
20+
<push state="multiline-comments"/>
21+
</rule>
22+
<rule pattern="(bigint|bigserial|bit|bit\s+varying|bool|boolean|box|bytea|char|character|character\s+varying|cidr|circle|date|decimal|double\s+precision|float4|float8|inet|int|int2|int4|int8|integer|interval|json|jsonb|line|lseg|macaddr|money|numeric|path|pg_lsn|point|polygon|real|serial|serial2|serial4|serial8|smallint|smallserial|text|time|timestamp|timestamptz|timetz|tsquery|tsvector|txid_snapshot|uuid|varbit|varchar|with\s+time\s+zone|without\s+time\s+zone|xml|anyarray|anyelement|anyenum|anynonarray|anyrange|cstring|fdw_handler|internal|language_handler|opaque|record|void)\b">
23+
<token type="NameBuiltin"/>
24+
</rule>
25+
<rule pattern="(?s)(DO)(\s+)(?:(LANGUAGE)?(\s+)(&#39;?)(\w+)?(&#39;?)(\s+))?(\$)([^$]*)(\$)(.*?)(\$)(\10)(\$)">
26+
<usingbygroup>
27+
<sublexer_name_group>6</sublexer_name_group>
28+
<code_group>12</code_group>
29+
<emitters>
30+
<token type="Keyword"/>
31+
<token type="Text"/>
32+
<token type="Keyword"/>
33+
<token type="Text"/>
34+
<token type="LiteralStringSingle"/>
35+
<token type="LiteralStringSingle"/>
36+
<token type="LiteralStringSingle"/>
37+
<token type="Text"/>
38+
<token type="LiteralStringHeredoc"/>
39+
<token type="LiteralStringHeredoc"/>
40+
<token type="LiteralStringHeredoc"/>
41+
<token type="LiteralStringHeredoc"/>
42+
<token type="LiteralStringHeredoc"/>
43+
<token type="LiteralStringHeredoc"/>
44+
<token type="LiteralStringHeredoc"/>
45+
</emitters>
46+
</usingbygroup>
47+
</rule>
48+
<rule pattern="(ACCESS|ACKS|ADD|ADDRESSES|AGGREGATE|ALL|ALTER|AND|ANY|ARN|ARRANGEMENT|ARRAY|AS|ASC|ASSERT|AT|AUCTION|AUTHORITY|AVAILABILITY|AVRO|AWS|BEGIN|BETWEEN|BIGINT|BILLED|BODY|BOOLEAN|BOTH|BPCHAR|BROKEN|BROKER|BROKERS|BY|BYTES|CARDINALITY|CASCADE|CASE|CAST|CERTIFICATE|CHAIN|CHAR|CHARACTER|CHARACTERISTICS|CHECK|CLIENT|CLOSE|CLUSTER|CLUSTERS|COALESCE|COLLATE|COLUMN|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPACTION|COMPRESSION|COMPUTE|COMPUTECTL|CONFLUENT|CONNECTION|CONNECTIONS|CONSTRAINT|COPY|COUNT|COUNTER|CREATE|CREATECLUSTER|CREATEDB|CREATEROLE|CROSS|CSV|CURRENT|CURSOR|DATABASE|DATABASES|DATUMS|DAY|DAYS|DEALLOCATE|DEBEZIUM|DEBUG|DEBUGGING|DEC|DECIMAL|DECLARE|DECORRELATED|DEFAULT|DEFAULTS|DELETE|DELIMITED|DELIMITER|DESC|DETAILS|DISCARD|DISK|DISTINCT|DOC|DOT|DOUBLE|DROP|EFFORT|ELEMENT|ELSE|ENABLE|END|ENDPOINT|ENFORCED|ENVELOPE|ERROR|ESCAPE|EXCEPT|EXECUTE|EXISTS|EXPECTED|EXPLAIN|EXPOSE|EXTRACT|FACTOR|FALSE|FETCH|FIELDS|FILTER|FIRST|FLOAT|FOLLOWING|FOR|FOREIGN|FORMAT|FORWARD|FROM|FULL|FULLNAME|FUNCTION|GENERATOR|GRANT|GREATEST|GROUP|GROUPS|HAVING|HEADER|HEADERS|HOLD|HOST|HOUR|HOURS|ID|IDEMPOTENCE|IDLE|IF|IGNORE|ILIKE|IN|INCLUDE|INDEX|INDEXES|INFO|INHERIT|INLINE|INNER|INPUT|INSERT|INSPECT|INT|INTEGER|INTERNAL|INTERSECT|INTERVAL|INTO|INTROSPECTION|IS|ISNULL|ISOLATION|JOIN|JSON|KAFKA|KEY|KEYS|LAST|LATERAL|LATEST|LEADING|LEAST|LEFT|LEVEL|LIKE|LIMIT|LIST|LOAD|LOCAL|LOG|LOGICAL|LOGIN|MANAGED|MAP|MARKETING|MATERIALIZE|MATERIALIZED|MAX|MECHANISMS|MEMBERSHIP|MERGE|MESSAGE|METADATA|MINUTE|MINUTES|MODE|MONTH|MONTHS|MS|MUTUALLY|NAME|NAMES|NATURAL|NEXT|NO|NOCREATECLUSTER|NOCREATEDB|NOCREATEROLE|NOINHERIT|NOLOGIN|NONE|NOSUPERUSER|NOT|NOTICE|NULL|NULLIF|NULLS|OBJECTS|OF|OFFSET|ON|ONLY|OPERATOR|OPTIMIZED|OPTIMIZER|OPTIONS|OR|ORDER|ORDINALITY|OUTER|OVER|OWNED|OWNER|PARTITION|PASSWORD|PHYSICAL|PLAN|PLANS|PORT|POSITION|POSTGRES|PRECEDING|PRECISION|PREFIX|PREPARE|PRIMARY|PRIVATELINK|PRIVILEGES|PROGRESS|PROTOBUF|PROTOCOL|PUBLICATION|QUERY|QUOTE|RAISE|RANGE|RAW|READ|REAL|REASSIGN|RECURSION|RECURSIVE|REFERENCES|REFRESH|REGEX|REGION|REGISTRY|RENAME|REPEATABLE|REPLACE|REPLICA|REPLICAS|REPLICATION|RESET|RESPECT|RESTRICT|RETENTION|RETURN|RETURNING|REVOKE|RIGHT|ROLE|ROLES|ROLLBACK|ROTATE|ROW|ROWS|SASL|SCALE|SCHEMA|SCHEMAS|SCRIPT|SECOND|SECONDS|SECRET|SECRETS|SECURITY|SEED|SELECT|SEQUENCES|SERIALIZABLE|SERVICE|SESSION|SET|SHARD|SHOW|SINK|SINKS|SIZE|SMALLINT|SNAPSHOT|SOME|SOURCE|SOURCES|SSH|SSL|START|STDIN|STDOUT|STORAGE|STORAGECTL|STRATEGY|STRICT|STRING|SUBSCRIBE|SUBSOURCE|SUBSOURCES|SUBSTRING|SUPERUSER|SWAP|SYSTEM|TABLE|TABLES|TAIL|TEMP|TEMPORARY|TEST|TEXT|THEN|TICK|TIES|TIME|TIMELINE|TIMEOUT|TIMESTAMP|TIMESTAMPTZ|TO|TOKEN|TOPIC|TPCH|TRACE|TRAILING|TRANSACTION|TRIM|TRUE|TUNNEL|TYPE|TYPES|UNBOUNDED|UNCOMMITTED|UNION|UNIQUE|UNKNOWN|UP|UPDATE|UPSERT|URL|USAGE|USER|USERNAME|USERS|USING|VALIDATE|VALUE|VALUES|VARCHAR|VARYING|VIEW|VIEWS|WARNING|WEBHOOK|WHEN|WHERE|WINDOW|WIRE|WITH|WITHIN|WITHOUT|WORK|WORKERS|WRITE|YEAR|YEARS|ZONE|ZONES)\b">
49+
<token type="Keyword"/>
50+
</rule>
51+
<rule pattern="[+*/&lt;&gt;=~!@#%^&amp;|`?-]+">
52+
<token type="Operator"/>
53+
</rule>
54+
<rule pattern="::">
55+
<token type="Operator"/>
56+
</rule>
57+
<rule pattern="\$\d+">
58+
<token type="NameVariable"/>
59+
</rule>
60+
<rule pattern="([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?">
61+
<token type="LiteralNumberFloat"/>
62+
</rule>
63+
<rule pattern="[0-9]+">
64+
<token type="LiteralNumberInteger"/>
65+
</rule>
66+
<rule pattern="((?:E|U&amp;)?)(&#39;)">
67+
<bygroups>
68+
<token type="LiteralStringAffix"/>
69+
<token type="LiteralStringSingle"/>
70+
</bygroups>
71+
<push state="string"/>
72+
</rule>
73+
<rule pattern="((?:U&amp;)?)(&#34;)">
74+
<bygroups>
75+
<token type="LiteralStringAffix"/>
76+
<token type="LiteralStringName"/>
77+
</bygroups>
78+
<push state="quoted-ident"/>
79+
</rule>
80+
<rule pattern="(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)(\s+)(LANGUAGE)?(\s+)(&#39;?)(\w+)?(&#39;?)">
81+
<usingbygroup>
82+
<sublexer_name_group>12</sublexer_name_group>
83+
<code_group>4</code_group>
84+
<emitters>
85+
<token type="LiteralStringHeredoc"/>
86+
<token type="LiteralStringHeredoc"/>
87+
<token type="LiteralStringHeredoc"/>
88+
<token type="LiteralStringHeredoc"/>
89+
<token type="LiteralStringHeredoc"/>
90+
<token type="LiteralStringHeredoc"/>
91+
<token type="LiteralStringHeredoc"/>
92+
<token type="Text"/>
93+
<token type="Keyword"/>
94+
<token type="Text"/>
95+
<token type="LiteralStringSingle"/>
96+
<token type="LiteralStringSingle"/>
97+
<token type="LiteralStringSingle"/>
98+
</emitters>
99+
</usingbygroup>
100+
</rule>
101+
<rule pattern="(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)">
102+
<token type="LiteralStringHeredoc"/>
103+
</rule>
104+
<rule pattern="[a-z_]\w*">
105+
<token type="Name"/>
106+
</rule>
107+
<rule pattern=":([&#39;&#34;]?)[a-z]\w*\b\1">
108+
<token type="NameVariable"/>
109+
</rule>
110+
<rule pattern="[;:()\[\]{},.]">
111+
<token type="Punctuation"/>
112+
</rule>
113+
</state>
114+
<state name="multiline-comments">
115+
<rule pattern="/\*">
116+
<token type="CommentMultiline"/>
117+
<push state="multiline-comments"/>
118+
</rule>
119+
<rule pattern="\*/">
120+
<token type="CommentMultiline"/>
121+
<pop depth="1"/>
122+
</rule>
123+
<rule pattern="[^/*]+">
124+
<token type="CommentMultiline"/>
125+
</rule>
126+
<rule pattern="[/*]">
127+
<token type="CommentMultiline"/>
128+
</rule>
129+
</state>
130+
<state name="string">
131+
<rule pattern="[^&#39;]+">
132+
<token type="LiteralStringSingle"/>
133+
</rule>
134+
<rule pattern="&#39;&#39;">
135+
<token type="LiteralStringSingle"/>
136+
</rule>
137+
<rule pattern="&#39;">
138+
<token type="LiteralStringSingle"/>
139+
<pop depth="1"/>
140+
</rule>
141+
</state>
142+
<state name="quoted-ident">
143+
<rule pattern="[^&#34;]+">
144+
<token type="LiteralStringName"/>
145+
</rule>
146+
<rule pattern="&#34;&#34;">
147+
<token type="LiteralStringName"/>
148+
</rule>
149+
<rule pattern="&#34;">
150+
<token type="LiteralStringName"/>
151+
<pop depth="1"/>
152+
</rule>
153+
</state>
154+
</rules>
155+
</lexer>

0 commit comments

Comments
 (0)