Fix trailing comment lexing issues

dmarcotte · dmarcotte · commit 8b70dde750ac · 2025-04-01T10:53:46.000-07:00
The comment collection/preservation code in
`commentMetadataForCurrentToken` was incorrectly creating some comment
tokens for trailing comments in these two cases:

- `"# should not be a comment"`
- `%%# should not be a comment`

This was because the trailing comment lexing lookahead was being done
on STRING_OPEN_QUOTE and EMBED_OPEN_DELIM, even though it makes no sense
to embed comments in these constructs.  Now fixed and tested.
diff --git a/src/commonMain/kotlin/org/kson/parser/Lexer.kt b/src/commonMain/kotlin/org/kson/parser/Lexer.kt
@@ -629,26 +629,39 @@ class Lexer(source: String, gapFree: Boolean = false) {
         // reset our collection of seen comments to prepare to collect comments for the next token
         currentCommentLines = ArrayList()
 
-        // lex ahead a bit looking for any trailing comments
+        // these tokens open comment free constructs, so they cannot have trailing comments
+        val acceptsTrailingComments = currentTokenType != STRING_OPEN_QUOTE
+                && currentTokenType != EMBED_OPEN_DELIM
+
+        // when appropriate, we lex ahead a bit looking for any trailing comments
         val trailingCommentTokens = ArrayList<Token>()
-        // consume non-newline whitespace right after this token
-        if (isInlineWhitespace(sourceScanner.peek())) {
-            while (isInlineWhitespace(sourceScanner.peek())) {
-                sourceScanner.advance()
+        if (acceptsTrailingComments) {
+            // consume non-newline whitespace right after this token
+            if (isInlineWhitespace(sourceScanner.peek())) {
+                while (isInlineWhitespace(sourceScanner.peek())) {
+                    sourceScanner.advance()
+                }
+                val whitespaceLexeme = sourceScanner.extractLexeme()
+                trailingCommentTokens.add(
+                    Token(
+                        WHITESPACE,
+                        whitespaceLexeme,
+                        whitespaceLexeme.text,
+                        emptyList()
+                    )
+                )
+            }
+            val trailingComment = if (sourceScanner.peek() == '#') {
+                val commentToken = extractCommentToken()
+                trailingCommentTokens.add(commentToken)
+                commentToken.value
+            } else {
+                ""
             }
-            val whitespaceLexeme = sourceScanner.extractLexeme()
-            trailingCommentTokens.add(Token(WHITESPACE, whitespaceLexeme, whitespaceLexeme.text, emptyList()))
-        }
-        val trailingComment = if (sourceScanner.peek() == '#') {
-            val commentToken = extractCommentToken()
-            trailingCommentTokens.add(commentToken)
-            commentToken.value
-        } else {
-            ""
-        }
 
-        if (trailingComment.isNotBlank()) {
-            commentsForToken.add(trailingComment)
+            if (trailingComment.isNotBlank()) {
+                commentsForToken.add(trailingComment)
+            }
         }
         return CommentMetadata(commentsForToken, trailingCommentTokens)
     }
diff --git a/src/commonTest/kotlin/org/kson/parser/LexerTest.kt b/src/commonTest/kotlin/org/kson/parser/LexerTest.kt
@@ -842,4 +842,34 @@ class LexerTest {
         val rightBracketToken = tokenList[4]
         assertEquals("# trailing list brace", rightBracketToken.comments[0])
     }
+
+    @Test
+    fun testHashInString() {
+        assertTokenizesTo(
+            "'# not a comment' # yes a coment",
+            listOf(STRING_OPEN_QUOTE, STRING, STRING_CLOSE_QUOTE, WHITESPACE, COMMENT),
+            testGapFreeLexing = true
+        )
+
+        assertTokenizesTo(
+            "'also # not a comment'# yes a comment",
+            listOf(STRING_OPEN_QUOTE, STRING, STRING_CLOSE_QUOTE, COMMENT),
+            testGapFreeLexing = true
+        )
+    }
+
+    @Test
+    fun testHashInEmbedTag() {
+        assertTokenizesTo(
+            "%%# should not be a comment",
+            listOf(EMBED_OPEN_DELIM, EMBED_TAG),
+            testGapFreeLexing = true
+        )
+
+        assertTokenizesTo(
+            "%%also # should not be a comment",
+            listOf(EMBED_OPEN_DELIM, EMBED_TAG),
+            testGapFreeLexing = true
+        )
+    }
 }