@@ -9,6 +9,7 @@ local byte, char, find, gsub, match, sub =
9
9
10
10
local _ENV = nil
11
11
12
+
12
13
local function newdecoder ()
13
14
local json , pos , nullv , arraylen
14
15
@@ -62,8 +63,9 @@ local function newdecoder()
62
63
63
64
--[[
64
65
Numbers
65
- Conceptually, the longest prefix that matches to `-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]*)?`
66
- (in regexp) is captured as a number and its conformance to the JSON spec is checked.
66
+ Conceptually, the longest prefix that matches to
67
+ `-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]*)?` (in regexp) is
68
+ captured as a number and its conformance to the JSON spec is checked.
67
69
--]]
68
70
-- deal with non-standard locales
69
71
local radixmark = match (tostring (0.5 ), ' [^0-9]' )
@@ -91,8 +93,8 @@ local function newdecoder()
91
93
return error_number ()
92
94
end
93
95
94
- if c == 0x2E then -- is this `.`?
95
- num = match (json , ' ^.[0-9]*' , pos ) -- skipping 0
96
+ if c == 0x2E then -- is this `.`?
97
+ num = match (json , ' ^.[0-9]*' , pos ) -- skipping 0
96
98
c = # num
97
99
if c == 1 then
98
100
return error_number ()
@@ -101,14 +103,14 @@ local function newdecoder()
101
103
c = byte (json , postmp )
102
104
end
103
105
104
- if c == 0x45 or c == 0x65 then -- is this e or E?
106
+ if c == 0x45 or c == 0x65 then -- is this e or E?
105
107
c = match (json , ' ^[^eE]*[eE][-+]?[0-9]+' , pos )
106
108
if not c then
107
109
return error_number ()
108
110
end
109
111
if num then
110
112
num = c
111
- else -- `0e.*` is always 0.0
113
+ else -- `0e.*` is always 0.0
112
114
numret = 0.0
113
115
end
114
116
postmp = pos + # c
@@ -128,13 +130,13 @@ local function newdecoder()
128
130
local function f_num (mns )
129
131
pos = pos - 1
130
132
local num = match (json , ' ^.[0-9]*%.?[0-9]*' , pos )
131
- if byte (num , - 1 ) == 0x2E then
133
+ if byte (num , - 1 ) == 0x2E then -- `.`?
132
134
return error_number ()
133
135
end
134
136
local postmp = pos + # num
135
137
local c = byte (json , postmp )
136
138
137
- if c == 0x45 or c == 0x65 then -- e or E?
139
+ if c == 0x45 or c == 0x65 then -- e or E?
138
140
num = match (json , ' ^[^eE]*[eE][-+]?[0-9]+' , pos )
139
141
if not num then
140
142
return error_number ()
@@ -177,10 +179,13 @@ local function newdecoder()
177
179
Strings
178
180
--]]
179
181
local f_str_hextbl = {
180
- 0x0 , 0x1 , 0x2 , 0x3 , 0x4 , 0x5 , 0x6 , 0x7 , 0x8 , 0x9 , inf , inf , inf , inf , inf , inf ,
181
- inf , 0xA , 0xB , 0xC , 0xD , 0xE , 0xF , inf , inf , inf , inf , inf , inf , inf , inf , inf ,
182
- inf , inf , inf , inf , inf , inf , inf , inf , inf , inf , inf , inf , inf , inf , inf , inf ,
183
- inf , 0xA , 0xB , 0xC , 0xD , 0xE , 0xF , inf , inf , inf , inf , inf , inf , inf , inf , inf ,
182
+ 0x0 , 0x1 , 0x2 , 0x3 , 0x4 , 0x5 , 0x6 , 0x7 ,
183
+ 0x8 , 0x9 , inf , inf , inf , inf , inf , inf ,
184
+ inf , 0xA , 0xB , 0xC , 0xD , 0xE , 0xF , inf ,
185
+ inf , inf , inf , inf , inf , inf , inf , inf ,
186
+ inf , inf , inf , inf , inf , inf , inf , inf ,
187
+ inf , inf , inf , inf , inf , inf , inf , inf ,
188
+ inf , 0xA , 0xB , 0xC , 0xD , 0xE , 0xF ,
184
189
}
185
190
f_str_hextbl .__index = function ()
186
191
return inf
@@ -211,12 +216,12 @@ local function newdecoder()
211
216
f_str_hextbl [c3 - 47 ] * 0x10 +
212
217
f_str_hextbl [c4 - 47 ]
213
218
if ucode ~= inf then
214
- if ucode < 0x80 then -- 1byte
219
+ if ucode < 0x80 then -- 1byte
215
220
if rest then
216
221
return char (ucode , rest )
217
222
end
218
223
return char (ucode )
219
- elseif ucode < 0x800 then -- 2byte
224
+ elseif ucode < 0x800 then -- 2bytes
220
225
c1 = floor (ucode / 0x40 )
221
226
c2 = ucode - c1 * 0x40
222
227
c1 = c1 + 0xC0
@@ -225,7 +230,7 @@ local function newdecoder()
225
230
return char (c1 , c2 , rest )
226
231
end
227
232
return char (c1 , c2 )
228
- elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3byte
233
+ elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes
229
234
c1 = floor (ucode / 0x1000 )
230
235
ucode = ucode - c1 * 0x1000
231
236
c2 = floor (ucode / 0x40 )
@@ -237,7 +242,7 @@ local function newdecoder()
237
242
return char (c1 , c2 , c3 , rest )
238
243
end
239
244
return char (c1 , c2 , c3 )
240
- elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
245
+ elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
241
246
if f_str_surrogate_prev == 0 then
242
247
f_str_surrogate_prev = ucode
243
248
if not rest then
@@ -247,7 +252,7 @@ local function newdecoder()
247
252
end
248
253
f_str_surrogate_prev = 0
249
254
decodeerror (" two contiguous 1st surrogate pair bytes" )
250
- else -- surrogate pair 2nd
255
+ else -- surrogate pair 2nd
251
256
if f_str_surrogate_prev ~= 0 then
252
257
ucode = 0x10000 +
253
258
(f_str_surrogate_prev - 0xD800 ) * 0x400 +
@@ -288,38 +293,43 @@ local function newdecoder()
288
293
local pos2 = pos
289
294
local c1 , c2
290
295
repeat
291
- newpos = find (json , ' "' , pos2 , true ) -- search '"'
296
+ newpos = find (json , ' "' , pos2 , true ) -- search '"'
292
297
if not newpos then
293
298
decodeerror (" unterminated string" )
294
299
end
295
300
pos2 = newpos + 1
296
- while true do -- skip preceding '\\'s
301
+ while true do -- skip preceding '\\'s
297
302
c1 , c2 = byte (json , newpos - 2 , newpos - 1 )
298
303
if c2 ~= 0x5C or c1 ~= 0x5C then
299
304
break
300
305
end
301
306
newpos = newpos - 2
302
307
end
303
- until c2 ~= 0x5C -- check '"' is not preceded by '\'
308
+ until c2 ~= 0x5C -- leave if '"' is not preceded by '\'
304
309
305
310
local str = sub (json , pos , pos2 - 2 )
306
311
pos = pos2
307
312
308
- if iskey then -- check key cache
313
+ if iskey then -- check key cache
309
314
local str2 = f_str_keycache [str ]
310
315
if str2 then
311
316
return str2
312
317
end
313
318
end
314
319
local str2 = str
315
- if find (str2 , ' \\ ' , 1 , true ) then -- check if backslash occurs
316
- str2 = gsub (str2 , ' \\ (.)([^\\ ]?[^\\ ]?[^\\ ]?[^\\ ]?[^\\ ]?)' , f_str_subst ) -- interpret escapes
320
+ if find (str2 , ' \\ ' , 1 , true ) then -- check whether a backslash exists
321
+ -- We need to grab 4 characters after the escape char,
322
+ -- for encoding unicode codepoint to UTF-8.
323
+ -- As we need to ensure that every first surrogate pair byte is
324
+ -- immediately followed by second one, we grab upto 5 characters and
325
+ -- check the last for this purpose.
326
+ str2 = gsub (str2 , ' \\ (.)([^\\ ]?[^\\ ]?[^\\ ]?[^\\ ]?[^\\ ]?)' , f_str_subst )
317
327
if f_str_surrogate_prev ~= 0 then
318
328
f_str_surrogate_prev = 0
319
329
decodeerror (" 1st surrogate pair byte not continued by 2nd" )
320
330
end
321
331
end
322
- if iskey then -- commit key cache
332
+ if iskey then -- commit key cache
323
333
f_str_keycache [str ] = str2
324
334
end
325
335
return str2
@@ -336,17 +346,17 @@ local function newdecoder()
336
346
pos = pos + 1
337
347
338
348
local i = 0
339
- if byte (json , pos ) ~= 0x5D then -- check closing bracket ']', that consists an empty array
349
+ if byte (json , pos ) ~= 0x5D then -- check closing bracket ']' which means the array empty
340
350
local newpos = pos - 1
341
351
repeat
342
352
i = i + 1
343
- f = dispatcher [byte (json ,newpos + 1 )] -- parse value
353
+ f = dispatcher [byte (json ,newpos + 1 )] -- parse value
344
354
pos = newpos + 2
345
355
ary [i ] = f ()
346
- f , newpos = find (json , ' ^[ \n\r\t ]*,[ \n\r\t ]*' , pos ) -- check comma
356
+ f , newpos = find (json , ' ^[ \n\r\t ]*,[ \n\r\t ]*' , pos ) -- check comma
347
357
until not newpos
348
358
349
- f , newpos = find (json , ' ^[ \n\r\t ]*%]' , pos ) -- check closing bracket
359
+ f , newpos = find (json , ' ^[ \n\r\t ]*%]' , pos ) -- check closing bracket
350
360
if not newpos then
351
361
decodeerror (" no closing bracket of an array" )
352
362
end
@@ -366,16 +376,16 @@ local function newdecoder()
366
376
367
377
f , pos = find (json , ' ^[ \n\r\t ]*' , pos )
368
378
pos = pos + 1
369
- if byte (json , pos ) ~= 0x7D then -- check the closing bracket '}', that consists an empty object
379
+ if byte (json , pos ) ~= 0x7D then -- check closing bracket '}' which means the object empty
370
380
local newpos = pos - 1
371
381
372
382
repeat
373
383
pos = newpos + 1
374
- if byte (json , pos ) ~= 0x22 then -- check '"'
384
+ if byte (json , pos ) ~= 0x22 then -- check '"'
375
385
decodeerror (" not key" )
376
386
end
377
387
pos = pos + 1
378
- local key = f_str (true ) -- parse key
388
+ local key = f_str (true ) -- parse key
379
389
380
390
-- optimized for compact json
381
391
-- c1, c2 == ':', <the first char of the value> or
@@ -392,13 +402,13 @@ local function newdecoder()
392
402
f = dispatcher [c2 ]
393
403
end
394
404
end
395
- if f == f_err then -- read a colon and arbitrary number of spaces
405
+ if f == f_err then -- read a colon and arbitrary number of spaces
396
406
f , newpos = find (json , ' ^[ \n\r\t ]*:[ \n\r\t ]*' , pos )
397
407
if not newpos then
398
408
decodeerror (" no colon after a key" )
399
409
end
400
410
end
401
- f = dispatcher [byte (json , newpos + 1 )] -- parse value
411
+ f = dispatcher [byte (json , newpos + 1 )] -- parse value
402
412
pos = newpos + 2
403
413
obj [key ] = f ()
404
414
f , newpos = find (json , ' ^[ \n\r\t ]*,[ \n\r\t ]*' , pos )
@@ -416,18 +426,27 @@ local function newdecoder()
416
426
end
417
427
418
428
--[[
419
- The jump table to dispatch a parser for a value, indexed by the code of the value's first char.
429
+ The jump table to dispatch a parser for a value,
430
+ indexed by the code of the value's first char.
420
431
Nil key means the end of json.
421
432
--]]
422
433
dispatcher = {
423
- f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
424
- f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
425
- f_err , f_err , f_str , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_mns , f_err , f_err ,
426
- f_zro , f_num , f_num , f_num , f_num , f_num , f_num , f_num , f_num , f_num , f_err , f_err , f_err , f_err , f_err , f_err ,
427
- f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
428
- f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_ary , f_err , f_err , f_err , f_err ,
429
- f_err , f_err , f_err , f_err , f_err , f_err , f_fls , f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_nul , f_err ,
430
- f_err , f_err , f_err , f_err , f_tru , f_err , f_err , f_err , f_err , f_err , f_err , f_obj , f_err , f_err , f_err , f_err ,
434
+ f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
435
+ f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
436
+ f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
437
+ f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
438
+ f_err , f_err , f_str , f_err , f_err , f_err , f_err , f_err ,
439
+ f_err , f_err , f_err , f_err , f_err , f_mns , f_err , f_err ,
440
+ f_zro , f_num , f_num , f_num , f_num , f_num , f_num , f_num ,
441
+ f_num , f_num , f_err , f_err , f_err , f_err , f_err , f_err ,
442
+ f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
443
+ f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
444
+ f_err , f_err , f_err , f_err , f_err , f_err , f_err , f_err ,
445
+ f_err , f_err , f_err , f_ary , f_err , f_err , f_err , f_err ,
446
+ f_err , f_err , f_err , f_err , f_err , f_err , f_fls , f_err ,
447
+ f_err , f_err , f_err , f_err , f_err , f_err , f_nul , f_err ,
448
+ f_err , f_err , f_err , f_err , f_tru , f_err , f_err , f_err ,
449
+ f_err , f_err , f_err , f_obj , f_err , f_err , f_err , f_err ,
431
450
}
432
451
dispatcher [0 ] = f_err
433
452
dispatcher .__index = function ()
0 commit comments