@@ -80,10 +80,6 @@ def initialize
80
80
@binary_input = nil
81
81
@current_token = nil
82
82
@debug = false
83
- @input = nil
84
- @input_encoding = nil
85
- @line = 0
86
- @line_pos = 0
87
83
@s = nil
88
84
@tokens = [ ]
89
85
end
@@ -319,13 +315,6 @@ def build_verbatim margin
319
315
verbatim
320
316
end
321
317
322
- ##
323
- # The character offset for the input string at the given +byte_offset+
324
-
325
- def char_pos byte_offset
326
- @input . byteslice ( 0 , byte_offset ) . length
327
- end
328
-
329
318
##
330
319
# Pulls the next token from the stream.
331
320
@@ -424,15 +413,54 @@ def peek_token
424
413
token
425
414
end
426
415
416
+ ##
417
+ # A simple wrapper of StringScanner that is aware of the current column and lineno
418
+
419
+ class MyStringScanner
420
+ def initialize ( input )
421
+ @line = @column = 0
422
+ @s = StringScanner . new input
423
+ end
424
+
425
+ def scan ( re )
426
+ prev_pos = @s . pos
427
+ ret = @s . scan ( re )
428
+ @column += ret . length if ret
429
+ ret
430
+ end
431
+
432
+ def unscan ( s )
433
+ @s . pos -= s . bytesize
434
+ @column -= s . length
435
+ end
436
+
437
+ def pos
438
+ [ @column , @line ]
439
+ end
440
+
441
+ def newline!
442
+ @column = 0
443
+ @line += 1
444
+ end
445
+
446
+ def eos?
447
+ @s . eos?
448
+ end
449
+
450
+ def matched
451
+ @s . matched
452
+ end
453
+
454
+ def []( i )
455
+ @s [ i ]
456
+ end
457
+ end
458
+
427
459
##
428
460
# Creates the StringScanner
429
461
430
462
def setup_scanner input
431
- @line = 0
432
- @line_pos = 0
433
- @input = input . dup
434
-
435
- @s = StringScanner . new input
463
+ @s = MyStringScanner . new input
436
464
end
437
465
438
466
##
@@ -467,31 +495,30 @@ def tokenize input
467
495
@tokens << case
468
496
# [CR]LF => :NEWLINE
469
497
when @s . scan ( /\r ?\n / ) then
470
- token = [ :NEWLINE , @s . matched , *token_pos ( pos ) ]
471
- @line_pos = char_pos @s . pos
472
- @line += 1
498
+ token = [ :NEWLINE , @s . matched , *pos ]
499
+ @s . newline!
473
500
token
474
501
# === text => :HEADER then :TEXT
475
502
when @s . scan ( /(=+)(\s *)/ ) then
476
503
level = @s [ 1 ] . length
477
- header = [ :HEADER , level , *token_pos ( pos ) ]
504
+ header = [ :HEADER , level , *pos ]
478
505
479
506
if @s [ 2 ] =~ /^\r ?\n / then
480
- @s . pos -= @s [ 2 ] . length
507
+ @s . unscan ( @s [ 2 ] )
481
508
header
482
509
else
483
510
pos = @s . pos
484
511
@s . scan ( /.*/ )
485
512
@tokens << header
486
- [ :TEXT , @s . matched . sub ( /\r $/ , '' ) , *token_pos ( pos ) ]
513
+ [ :TEXT , @s . matched . sub ( /\r $/ , '' ) , *pos ]
487
514
end
488
515
# --- (at least 3) and nothing else on the line => :RULE
489
516
when @s . scan ( /(-{3,}) *\r ?$/ ) then
490
- [ :RULE , @s [ 1 ] . length - 2 , *token_pos ( pos ) ]
517
+ [ :RULE , @s [ 1 ] . length - 2 , *pos ]
491
518
# * or - followed by white space and text => :BULLET
492
519
when @s . scan ( /([*-]) +(\S )/ ) then
493
- @s . pos -= @s [ 2 ] . bytesize # unget \S
494
- [ :BULLET , @s [ 1 ] , *token_pos ( pos ) ]
520
+ @s . unscan ( @s [ 2 ] )
521
+ [ :BULLET , @s [ 1 ] , *pos ]
495
522
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
496
523
when @s . scan ( /([a-z]|\d +)\. +(\S )/i ) then
497
524
# FIXME if tab(s), the column will be wrong
@@ -500,7 +527,7 @@ def tokenize input
500
527
# before (and provide a check for that at least in debug
501
528
# mode)
502
529
list_label = @s [ 1 ]
503
- @s . pos -= @s [ 2 ] . bytesize # unget \S
530
+ @s . unscan ( @s [ 2 ] )
504
531
list_type =
505
532
case list_label
506
533
when /[a-z]/ then :LALPHA
@@ -509,24 +536,24 @@ def tokenize input
509
536
else
510
537
raise ParseError , "BUG token #{ list_label } "
511
538
end
512
- [ list_type , list_label , *token_pos ( pos ) ]
539
+ [ list_type , list_label , *pos ]
513
540
# [text] followed by spaces or end of line => :LABEL
514
541
when @s . scan ( /\[ (.*?)\] ( +|\r ?$)/ ) then
515
- [ :LABEL , @s [ 1 ] , *token_pos ( pos ) ]
542
+ [ :LABEL , @s [ 1 ] , *pos ]
516
543
# text:: followed by spaces or end of line => :NOTE
517
544
when @s . scan ( /(.*?)::( +|\r ?$)/ ) then
518
- [ :NOTE , @s [ 1 ] , *token_pos ( pos ) ]
545
+ [ :NOTE , @s [ 1 ] , *pos ]
519
546
# >>> followed by end of line => :BLOCKQUOTE
520
547
when @s . scan ( />>> *(\w +)?$/ ) then
521
- [ :BLOCKQUOTE , @s [ 1 ] , *token_pos ( pos ) ]
548
+ [ :BLOCKQUOTE , @s [ 1 ] , *pos ]
522
549
# anything else: :TEXT
523
550
else
524
551
@s . scan ( /(.*?)( )?\r ?$/ )
525
- token = [ :TEXT , @s [ 1 ] , *token_pos ( pos ) ]
552
+ token = [ :TEXT , @s [ 1 ] , *pos ]
526
553
527
554
if @s [ 2 ] then
528
555
@tokens << token
529
- [ :BREAK , @s [ 2 ] , * token_pos ( pos + @s [ 1 ] . length ) ]
556
+ [ :BREAK , @s [ 2 ] , pos [ 0 ] + @s [ 1 ] . length , pos [ 1 ] ]
530
557
else
531
558
token
532
559
end
@@ -536,16 +563,6 @@ def tokenize input
536
563
self
537
564
end
538
565
539
- ##
540
- # Calculates the column (by character) and line of the current token based
541
- # on +byte_offset+.
542
-
543
- def token_pos byte_offset
544
- offset = char_pos byte_offset
545
-
546
- [ offset - @line_pos , @line ]
547
- end
548
-
549
566
##
550
567
# Returns the current token to the token stream
551
568
0 commit comments