Skip to content

Commit 15ed6a4

Browse files
authored
enh(r) Add operators and punctuation (#3195)
This change adds highlighting for operators and punctuation, and fixes the issues described in #3194. * Give R a relevance boost from arrow-assign * Make `<-` less of a signal boost for R * Rebalance relevance of common syntactic constructs * Fix Vala having too much relevance for `^#` (meta/comment)
1 parent 75fd067 commit 15ed6a4

File tree

9 files changed

+157
-86
lines changed

9 files changed

+157
-86
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ Language Grammars:
4646

4747
Parser:
4848

49+
- enh(vala) improve language detection for Vala (#3195) [Konrad Rudolph][]
50+
- enh(r) add support for operators, fix number highlighting bug (#3194, #3195) [Konrad Rudolph][]
4951
- enh(parser) add `beginScope` and `endScope` to allow separate scoping begin and end (#3159) [Josh Goebel][]
5052
- enh(parsed) `endScope` now supports multi-class matchers as well (#3159) [Josh Goebel][]
5153
- enh(parser) `highlightElement` now always tags blocks with a consistent `language-[name]` class [Josh Goebel][]

src/languages/r.js

Lines changed: 84 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,27 @@ export default function(hljs) {
1818
// handled in a separate mode. See `test/markup/r/names.txt` for examples.
1919
// FIXME: Support Unicode identifiers.
2020
const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
21-
const SIMPLE_IDENT = /[a-zA-Z][a-zA-Z_0-9]*/;
21+
const NUMBER_TYPES_RE = regex.either(
22+
// Special case: only hexadecimal binary powers can contain fractions
23+
/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
24+
// Hexadecimal numbers without fraction and optional binary power
25+
/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
26+
// Decimal numbers
27+
/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
28+
);
29+
const OPERATORS_RE = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
30+
const PUNCTUATION_RE = regex.either(
31+
/[()]/,
32+
/[{}]/,
33+
/\[\[/,
34+
/[[\]]/,
35+
/\\/,
36+
/,/
37+
);
2238

2339
return {
2440
name: 'R',
2541

26-
// only in Haskell, not R
27-
illegal: /->/,
2842
keywords: {
2943
$pattern: IDENT_RE,
3044
keyword:
@@ -56,6 +70,7 @@ export default function(hljs) {
5670
'standardGeneric substitute sum switch tan tanh tanpi tracemem ' +
5771
'trigamma trunc unclass untracemem UseMethod xtfrm',
5872
},
73+
5974
contains: [
6075
// Roxygen comments
6176
hljs.COMMENT(
@@ -69,7 +84,7 @@ export default function(hljs) {
6984
// preventing highlighting. This code is example R code, so nested
7085
// doctags shouldn’t be treated as such. See
7186
// `test/markup/r/roxygen.txt` for an example.
72-
className: 'doctag',
87+
scope: 'doctag',
7388
begin: '@examples',
7489
starts: {
7590
contains: [
@@ -89,12 +104,12 @@ export default function(hljs) {
89104
{
90105
// Handle `@param` to highlight the parameter name following
91106
// after.
92-
className: 'doctag',
107+
scope: 'doctag',
93108
begin: '@param',
94109
end: /$/,
95110
contains: [
96111
{
97-
className: 'variable',
112+
scope: 'variable',
98113
variants: [
99114
{ begin: IDENT_RE },
100115
{ begin: /`(?:\\.|[^`\\])+`/ }
@@ -104,11 +119,11 @@ export default function(hljs) {
104119
]
105120
},
106121
{
107-
className: 'doctag',
122+
scope: 'doctag',
108123
begin: /@[a-zA-Z]+/
109124
},
110125
{
111-
className: 'keyword',
126+
scope: 'keyword',
112127
begin: /\\[a-zA-Z]+/,
113128
}
114129
]
@@ -118,7 +133,7 @@ export default function(hljs) {
118133
hljs.HASH_COMMENT_MODE,
119134

120135
{
121-
className: 'string',
136+
scope: 'string',
122137
contains: [hljs.BACKSLASH_ESCAPE],
123138
variants: [
124139
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }),
@@ -131,48 +146,88 @@ export default function(hljs) {
131146
{begin: "'", end: "'", relevance: 0}
132147
],
133148
},
149+
150+
// Matching numbers immediately following punctuation and operators is
151+
// tricky since we need to look at the character ahead of a number to
152+
// ensure the number is not part of an identifier, and we cannot use
153+
// negative look-behind assertions. So instead we explicitly handle all
154+
// possible combinations of (operator|punctuation), number.
155+
// TODO: replace with negative look-behind when available
156+
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
157+
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
158+
// { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
134159
{
135160
relevance: 0,
136-
className: {
137-
2: "number"
138-
},
139161
variants: [
140-
// TODO: replace with negative look-behind when available
141-
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
142-
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
143-
// { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
144162
{
145-
// Special case: only hexadecimal binary powers can contain fractions.
163+
scope: {
164+
1: 'operator',
165+
2: 'number'
166+
},
167+
match: [
168+
OPERATORS_RE,
169+
NUMBER_TYPES_RE
170+
]
171+
},
172+
{
173+
scope: {
174+
1: 'operator',
175+
2: 'number'
176+
},
146177
match: [
147-
/[^a-zA-Z0-9._]/, // not part of an identifier
148-
/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/
178+
/%[^%]*%/,
179+
NUMBER_TYPES_RE
149180
]
150181
},
151182
{
183+
scope: {
184+
1: 'punctuation',
185+
2: 'number'
186+
},
152187
match: [
153-
/[^a-zA-Z0-9._]/, // not part of an identifier
154-
/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/
188+
PUNCTUATION_RE,
189+
NUMBER_TYPES_RE
155190
]
156191
},
157192
{
193+
scope: { 2: 'number' },
158194
match: [
159-
/[^a-zA-Z0-9._]/, // not part of an identifier
160-
/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
195+
/[^a-zA-Z0-9._]|^/, // not part of an identifier, or start of document
196+
NUMBER_TYPES_RE
161197
]
162198
}
163199
]
164200
},
201+
202+
// Operators/punctuation when they're not directly followed by numbers
203+
{
204+
// Relevance boost for the most common assignment form.
205+
scope: { 3: 'operator' },
206+
match: [
207+
IDENT_RE,
208+
/\s+/,
209+
/<-/,
210+
/\s+/
211+
]
212+
},
213+
165214
{
166-
// infix operator
167-
begin: '%',
168-
end: '%'
215+
scope: 'operator',
216+
relevance: 0,
217+
variants: [
218+
{ match: OPERATORS_RE },
219+
{ match: /%[^%]*%/ }
220+
]
169221
},
170-
// relevance boost for assignment
222+
171223
{
172-
begin: regex.concat(SIMPLE_IDENT, "\\s+<-\\s+")
224+
scope: 'punctuation',
225+
relevance: 0,
226+
match: PUNCTUATION_RE
173227
},
228+
174229
{
175-
// escaped identifier
230+
// Escaped identifier
176231
begin: '`',
177232
end: '`',
178233
contains: [

src/languages/vala.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ export default function(hljs) {
5252
className: 'meta',
5353
begin: '^#',
5454
end: '$',
55-
relevance: 2
5655
}
5756
]
5857
};

test/markup/r/names.expect.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
<span class="hljs-comment"># Valid names</span>
22

3-
a1_foo, A1_FOO, .foo_, ._foo, Bar.42, foo..1, ., ._, .., ..., ..1, <span class="hljs-built_in">c</span>, <span class="hljs-built_in">T</span>, <span class="hljs-built_in">F</span>, ._1
3+
a1_foo<span class="hljs-punctuation">,</span> A1_FOO<span class="hljs-punctuation">,</span> .foo_<span class="hljs-punctuation">,</span> ._foo<span class="hljs-punctuation">,</span> Bar.42<span class="hljs-punctuation">,</span> foo..1<span class="hljs-punctuation">,</span> .<span class="hljs-punctuation">,</span> ._<span class="hljs-punctuation">,</span> ..<span class="hljs-punctuation">,</span> ...<span class="hljs-punctuation">,</span> ..1<span class="hljs-punctuation">,</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">T</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">F</span><span class="hljs-punctuation">,</span> ._1
44

55
<span class="hljs-comment"># Reserved Words</span>
66

7-
<span class="hljs-literal">NA</span>, <span class="hljs-literal">NA_integer_</span>, <span class="hljs-literal">NA_real_</span>, <span class="hljs-literal">NA_character_</span>, <span class="hljs-literal">NA_complex_</span>, <span class="hljs-literal">NULL</span>, <span class="hljs-literal">NaN</span>, <span class="hljs-literal">Inf</span>
7+
<span class="hljs-literal">NA</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_integer_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_real_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_character_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NA_complex_</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NULL</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">NaN</span><span class="hljs-punctuation">,</span> <span class="hljs-literal">Inf</span>
88

99
<span class="hljs-comment"># Keywords</span>
1010

11-
<span class="hljs-keyword">function</span>, <span class="hljs-keyword">while</span>, <span class="hljs-keyword">repeat</span>, <span class="hljs-keyword">for</span>, <span class="hljs-keyword">if</span>, <span class="hljs-keyword">in</span>, <span class="hljs-keyword">else</span>, <span class="hljs-keyword">next</span>, <span class="hljs-keyword">break</span>
11+
<span class="hljs-keyword">function</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">while</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">repeat</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">for</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">if</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">in</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">else</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">next</span><span class="hljs-punctuation">,</span> <span class="hljs-keyword">break</span>
1212

1313
<span class="hljs-comment"># Not reserved</span>
1414

15-
NULLa, NULL1, NULL., `NULL`, <span class="hljs-string">&#x27;NULL&#x27;</span>, NA_foo_, na_real_, Function, for.
15+
NULLa<span class="hljs-punctuation">,</span> NULL1<span class="hljs-punctuation">,</span> NULL.<span class="hljs-punctuation">,</span> `NULL`<span class="hljs-punctuation">,</span> <span class="hljs-string">&#x27;NULL&#x27;</span><span class="hljs-punctuation">,</span> NA_foo_<span class="hljs-punctuation">,</span> na_real_<span class="hljs-punctuation">,</span> Function<span class="hljs-punctuation">,</span> for.
1616

1717
<span class="hljs-comment"># Primitive built-ins</span>
1818

19-
<span class="hljs-built_in">return</span>, <span class="hljs-built_in">switch</span>, <span class="hljs-built_in">sum</span>
19+
<span class="hljs-built_in">return</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">switch</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">sum</span>
2020

2121
<span class="hljs-comment"># Non-primitive base functions</span>
2222

23-
stop, try
23+
stop<span class="hljs-punctuation">,</span> try
2424

2525
<span class="hljs-comment"># Quoted identifiers</span>
2626

test/markup/r/numbers.expect.txt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
1+
<span class="hljs-number">1</span> <span class="hljs-comment"># Regression caused numbers at beginning not to be highlighted.</span>
2+
13
<span class="hljs-comment"># Numbers</span>
24

3-
<span class="hljs-number">0</span>, <span class="hljs-number">01</span>, <span class="hljs-number">08</span>, <span class="hljs-number">123456</span>, <span class="hljs-number">1256.701</span>, <span class="hljs-number">123e3</span>, <span class="hljs-number">123E+3</span>, <span class="hljs-number">1.23e-3</span>, <span class="hljs-number">1.23E3</span>, <span class="hljs-number">.25</span>, <span class="hljs-number">2.</span>
5+
<span class="hljs-number">0</span><span class="hljs-punctuation">,</span> <span class="hljs-number">01</span><span class="hljs-punctuation">,</span> <span class="hljs-number">08</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123456</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1256.701</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123e3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">123E+3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.23e-3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.23E3</span><span class="hljs-punctuation">,</span> <span class="hljs-number">.25</span><span class="hljs-punctuation">,</span> <span class="hljs-number">2.</span>
46

57
<span class="hljs-comment"># Integers</span>
68

7-
<span class="hljs-number">123L</span>, -<span class="hljs-number">50L</span>
9+
<span class="hljs-number">123L</span><span class="hljs-punctuation">,</span> <span class="hljs-operator">-</span><span class="hljs-number">50L</span>
810

911
<span class="hljs-comment"># Imaginary numbers</span>
1012

11-
<span class="hljs-number">123i</span>, -<span class="hljs-number">123i</span>, <span class="hljs-number">1.2e-3i</span>, <span class="hljs-number">1.i</span>, <span class="hljs-number">.0i</span>
13+
<span class="hljs-number">123i</span><span class="hljs-punctuation">,</span> <span class="hljs-operator">-</span><span class="hljs-number">123i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.2e-3i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1.i</span><span class="hljs-punctuation">,</span> <span class="hljs-number">.0i</span>
1214

1315
<span class="hljs-comment"># Hex numbers</span>
1416

15-
<span class="hljs-number">0x0</span>, <span class="hljs-number">0xabcdefABCDEF01234</span>, <span class="hljs-number">0xabcp123</span>, <span class="hljs-number">0xabcP-123</span>, <span class="hljs-number">0x1.2p2</span>, <span class="hljs-number">0xa.bp-3i</span>
17+
<span class="hljs-number">0x0</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcdefABCDEF01234</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcp123</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xabcP-123</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0x1.2p2</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0xa.bp-3i</span>
1618

1719
<span class="hljs-comment"># Invalid/not literals (for reference)</span>
1820

test/markup/r/numbers.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
1 # Regression caused numbers at beginning not to be highlighted.
2+
13
# Numbers
24

35
0, 01, 08, 123456, 1256.701, 123e3, 123E+3, 1.23e-3, 1.23E3, .25, 2.

0 commit comments

Comments
 (0)