fix(bundle): Only accept space, CRLF & LF as syntactical whitespace (#651)

eemeli · web-flow · commit 13710cc6a5f6 · 2026-01-05T14:25:49.000+02:00
diff --git a/fluent-bundle/src/resource.ts b/fluent-bundle/src/resource.ts
@@ -19,11 +19,11 @@ import {
 
 // This regex is used to iterate through the beginnings of messages and terms.
 // With the /m flag, the ^ matches at the beginning of every line.
-const RE_MESSAGE_START = /^(-?[a-zA-Z][\w-]*) *= */gm;
+const RE_MESSAGE_START = /^(?<!\r)(-?[a-zA-Z][\w-]*) *= */gm;
 
 // Both Attributes and Variants are parsed in while loops. These regexes are
 // used to break out of them.
-const RE_ATTRIBUTE_START = /\.([a-zA-Z][\w-]*) *= */y;
+const RE_ATTRIBUTE_START = /(?<=\n *)\.([a-zA-Z][\w-]*) *= */y;
 const RE_VARIANT_START = /\*?\[/y;
 
 const RE_NUMBER_LITERAL = /(-?[0-9]+(?:\.([0-9]+))?)/y;
@@ -37,8 +37,8 @@ const RE_FUNCTION_NAME = /^[A-Z][A-Z0-9_-]*$/;
 // if the next line is indented. For StringLiterals they are: \ (starts an
 // escape sequence), " (ends the literal), and line breaks which are not allowed
 // in StringLiterals. Note that string runs may be empty; text runs may not.
-const RE_TEXT_RUN = /([^{}\n\r]+)/y;
-const RE_STRING_RUN = /([^\\"\n\r]*)/y;
+const RE_TEXT_RUN = /((?:[^{}\n\r]|\r(?!\n))+)/y;
+const RE_STRING_RUN = /((?:[^\\"\n\r]|\r(?!\n))*)/y;
 
 // Escape sequences.
 const RE_STRING_ESCAPE = /\\([\\"])/y;
@@ -53,17 +53,17 @@ const RE_BLANK_LINES = / *\r?\n/g;
 const RE_INDENT = /( *)$/;
 
 // Common tokens.
-const TOKEN_BRACE_OPEN = /{\s*/y;
-const TOKEN_BRACE_CLOSE = /\s*}/y;
-const TOKEN_BRACKET_OPEN = /\[\s*/y;
-const TOKEN_BRACKET_CLOSE = /\s*] */y;
-const TOKEN_PAREN_OPEN = /\s*\(\s*/y;
-const TOKEN_ARROW = /\s*->\s*/y;
-const TOKEN_COLON = /\s*:\s*/y;
+const TOKEN_BRACE_OPEN = /{(?: |\r?\n)*/y;
+const TOKEN_BRACE_CLOSE = /(?: |\r?\n)*}/y;
+const TOKEN_BRACKET_OPEN = /\[(?: |\r?\n)*/y;
+const TOKEN_BRACKET_CLOSE = /(?: |\r?\n)*] */y;
+const TOKEN_PAREN_OPEN = /(?: |\r?\n)*\((?: |\r?\n)*/y;
+const TOKEN_ARROW = /(?: |\r?\n)*->(?: |\r?\n)*/y;
+const TOKEN_COLON = /(?: |\r?\n)*:(?: |\r?\n)*/y;
 // Note the optional comma. As a deviation from the Fluent EBNF, the parser
 // doesn't enforce commas between call arguments.
-const TOKEN_COMMA = /\s*,?\s*/y;
-const TOKEN_BLANK = /\s+/y;
+const TOKEN_COMMA = /(?: |\r?\n)*,?(?: |\r?\n)*/y;
+const TOKEN_BLANK = /(?: |\r?\n)+/y;
 
 /**
  * Fluent Resource is a structure storing parsed localization entries.
diff --git a/fluent-bundle/test/fixtures_reference/cr.json b/fluent-bundle/test/fixtures_reference/cr.json
@@ -1,39 +1,3 @@
 {
-    "body": [
-        {
-            "id": "err01",
-            "value": "Value 01",
-            "attributes": {}
-        },
-        {
-            "id": "err02",
-            "value": "Value 02",
-            "attributes": {}
-        },
-        {
-            "id": "err03",
-            "value": [
-                "\r\r",
-                "Value 03",
-                "\r",
-                "Continued"
-            ],
-            "attributes": {
-                "title": "Title"
-            }
-        },
-        {
-            "id": "err05",
-            "value": [
-                {
-                    "type": "select",
-                    "selector": {
-                        "type": "var",
-                        "name": "sel"
-                    }
-                }
-            ],
-            "attributes": {}
-        }
-    ]
+    "body": []
 }
diff --git a/fluent-bundle/test/fixtures_reference/tab.json b/fluent-bundle/test/fixtures_reference/tab.json
@@ -23,6 +23,13 @@
             "attributes": {
                 "attr": "\t\t"
             }
+        },
+        {
+            "id": "key07",
+            "value": null,
+            "attributes": {
+                "good": "Spaces as indent"
+            }
         }
     ]
 }
diff --git a/fluent-react/test/use_localization.test.js b/fluent-react/test/use_localization.test.js
@@ -27,7 +27,7 @@ describe("useLocalization", () => {
     const bundle = new FluentBundle("en");
     bundle.addResource(
       new FluentResource(
-        "foo = FOO\nbar = BAR<elem>BAZ</elem>\n\t.title = QUX\n"
+        "foo = FOO\nbar = BAR<elem>BAZ</elem>\n  .title = QUX\n"
       )
     );
     return bundle;
diff --git a/fluent-syntax/makefile b/fluent-syntax/makefile
@@ -2,13 +2,13 @@ export SHELL := /bin/bash
 
 OK := \033[32;01m✓\033[0m
 
-STRUCTURE_FTL := $(wildcard test/fixtures_structure/*.ftl)
-STRUCTURE_AST := $(STRUCTURE_FTL:.ftl=.json)
+FIXTURES_FTL := $(wildcard test/fixtures_structure/*.ftl) $(wildcard test/fixtures_reference/*.ftl)
+FIXTURES_AST := $(FIXTURES_FTL:.ftl=.json)
 
 .PHONY: fixtures
-fixtures: $(STRUCTURE_AST)
+fixtures: $(FIXTURES_AST)
 
-.PHONY: $(STRUCTURE_AST)
-$(STRUCTURE_AST): test/fixtures_structure/%.json: test/fixtures_structure/%.ftl
-	@../tools/parse.js --silent --with-spans $< > $@
+.PHONY: $(FIXTURES_AST)
+$(FIXTURES_AST): test/fixtures_%.json: test/fixtures_%.ftl
+	@../tools/parse.mjs --silent --with-spans $< > $@
 	@echo -e " $(OK) $@"
diff --git a/fluent-syntax/test/fixtures_reference/tab.ftl b/fluent-syntax/test/fixtures_reference/tab.ftl
@@ -19,3 +19,8 @@ key05 =
 # OK (attribute value is two tabs)
 key06 =
   .attr = 		
+
+# Partial Error (tab is not a valid indent)
+key07 =
+  .good = Spaces as indent
+	.bad = Tab as indent
diff --git a/fluent-syntax/test/fixtures_reference/tab.json b/fluent-syntax/test/fixtures_reference/tab.json
@@ -116,6 +116,41 @@
                 "type": "Comment",
                 "content": "OK (attribute value is two tabs)"
             }
+        },
+        {
+            "type": "Message",
+            "id": {
+                "type": "Identifier",
+                "name": "key07"
+            },
+            "value": null,
+            "attributes": [
+                {
+                    "type": "Attribute",
+                    "id": {
+                        "type": "Identifier",
+                        "name": "good"
+                    },
+                    "value": {
+                        "type": "Pattern",
+                        "elements": [
+                            {
+                                "type": "TextElement",
+                                "value": "Spaces as indent"
+                            }
+                        ]
+                    }
+                }
+            ],
+            "comment": {
+                "type": "Comment",
+                "content": "Partial Error (tab is not a valid indent)"
+            }
+        },
+        {
+            "type": "Junk",
+            "annotations": [],
+            "content": "\t.bad = Tab as indent\n"
         }
     ]
 }

Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,13 @@`
`23`	`23`	`"attributes": {`
`24`	`24`	`"attr": "\t\t"`
`25`	`25`	`}`
	`26`	`+ },`
	`27`	`+ {`
	`28`	`+ "id": "key07",`
	`29`	`+ "value": null,`
	`30`	`+ "attributes": {`
	`31`	`+ "good": "Spaces as indent"`
	`32`	`+ }`
`26`	`33`	`}`
`27`	`34`	`]`
`28`	`35`	`}`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ describe("useLocalization", () => {`
`27`	`27`	`const bundle = new FluentBundle("en");`
`28`	`28`	`bundle.addResource(`
`29`	`29`	`new FluentResource(`
`30`		`- "foo = FOO\nbar = BAR<elem>BAZ</elem>\n\t.title = QUX\n"`
	`30`	`+ "foo = FOO\nbar = BAR<elem>BAZ</elem>\n .title = QUX\n"`
`31`	`31`	`)`
`32`	`32`	`);`
`33`	`33`	`return bundle;`