Skip to content

Commit 8b70dde

Browse files
committed
Fix trailing comment lexing issues
The comment collection/preservation code in `commentMetadataForCurrentToken` was incorrectly creating some comment tokens for trailing comments in these two cases: - `"# should not be a comment"` - `%%# should not be a comment` This was because the trailing comment lexing lookahead was being done on STRING_OPEN_QUOTE and EMBED_OPEN_DELIM, even though it makes no sense to embed comments in these constructs. Now fixed and tested.
1 parent 9eed596 commit 8b70dde

File tree

2 files changed

+60
-17
lines changed

2 files changed

+60
-17
lines changed

src/commonMain/kotlin/org/kson/parser/Lexer.kt

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -629,26 +629,39 @@ class Lexer(source: String, gapFree: Boolean = false) {
629629
// reset our collection of seen comments to prepare to collect comments for the next token
630630
currentCommentLines = ArrayList()
631631

632-
// lex ahead a bit looking for any trailing comments
632+
// these tokens open comment free constructs, so they cannot have trailing comments
633+
val acceptsTrailingComments = currentTokenType != STRING_OPEN_QUOTE
634+
&& currentTokenType != EMBED_OPEN_DELIM
635+
636+
// when appropriate, we lex ahead a bit looking for any trailing comments
633637
val trailingCommentTokens = ArrayList<Token>()
634-
// consume non-newline whitespace right after this token
635-
if (isInlineWhitespace(sourceScanner.peek())) {
636-
while (isInlineWhitespace(sourceScanner.peek())) {
637-
sourceScanner.advance()
638+
if (acceptsTrailingComments) {
639+
// consume non-newline whitespace right after this token
640+
if (isInlineWhitespace(sourceScanner.peek())) {
641+
while (isInlineWhitespace(sourceScanner.peek())) {
642+
sourceScanner.advance()
643+
}
644+
val whitespaceLexeme = sourceScanner.extractLexeme()
645+
trailingCommentTokens.add(
646+
Token(
647+
WHITESPACE,
648+
whitespaceLexeme,
649+
whitespaceLexeme.text,
650+
emptyList()
651+
)
652+
)
653+
}
654+
val trailingComment = if (sourceScanner.peek() == '#') {
655+
val commentToken = extractCommentToken()
656+
trailingCommentTokens.add(commentToken)
657+
commentToken.value
658+
} else {
659+
""
638660
}
639-
val whitespaceLexeme = sourceScanner.extractLexeme()
640-
trailingCommentTokens.add(Token(WHITESPACE, whitespaceLexeme, whitespaceLexeme.text, emptyList()))
641-
}
642-
val trailingComment = if (sourceScanner.peek() == '#') {
643-
val commentToken = extractCommentToken()
644-
trailingCommentTokens.add(commentToken)
645-
commentToken.value
646-
} else {
647-
""
648-
}
649661

650-
if (trailingComment.isNotBlank()) {
651-
commentsForToken.add(trailingComment)
662+
if (trailingComment.isNotBlank()) {
663+
commentsForToken.add(trailingComment)
664+
}
652665
}
653666
return CommentMetadata(commentsForToken, trailingCommentTokens)
654667
}

src/commonTest/kotlin/org/kson/parser/LexerTest.kt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,4 +842,34 @@ class LexerTest {
842842
val rightBracketToken = tokenList[4]
843843
assertEquals("# trailing list brace", rightBracketToken.comments[0])
844844
}
845+
846+
@Test
847+
fun testHashInString() {
848+
assertTokenizesTo(
849+
"'# not a comment' # yes a coment",
850+
listOf(STRING_OPEN_QUOTE, STRING, STRING_CLOSE_QUOTE, WHITESPACE, COMMENT),
851+
testGapFreeLexing = true
852+
)
853+
854+
assertTokenizesTo(
855+
"'also # not a comment'# yes a comment",
856+
listOf(STRING_OPEN_QUOTE, STRING, STRING_CLOSE_QUOTE, COMMENT),
857+
testGapFreeLexing = true
858+
)
859+
}
860+
861+
@Test
862+
fun testHashInEmbedTag() {
863+
assertTokenizesTo(
864+
"%%# should not be a comment",
865+
listOf(EMBED_OPEN_DELIM, EMBED_TAG),
866+
testGapFreeLexing = true
867+
)
868+
869+
assertTokenizesTo(
870+
"%%also # should not be a comment",
871+
listOf(EMBED_OPEN_DELIM, EMBED_TAG),
872+
testGapFreeLexing = true
873+
)
874+
}
845875
}

0 commit comments

Comments
 (0)