diff --git a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift index b38a07e1..b9693d97 100644 --- a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift +++ b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift @@ -331,7 +331,9 @@ extension Parser { /// /// Diagnoses on overflow /// - mutating func lexNumber(_ kind: RadixKind = .decimal) -> AST.Atom.Number? { + mutating func lexNumber( + _ kind: RadixKind = .decimal + ) -> AST.Atom.Number? { guard let str = tryEatPrefix(kind.characterFilter) else { return nil } @@ -342,6 +344,26 @@ extension Parser { return .init(i, at: str.location) } + /// Try to eat a quantification bound, such as appears in `/x{3,12}` + /// + /// Returns: `nil` if there's no number, otherwise the number + /// + /// Diagnoses on overflow. Currently, we will diagnose for any values over `UInt16.max` + /// + mutating func lexQuantBound() -> AST.Atom.Number? { + let kind = RadixKind.decimal + guard let str = tryEatPrefix(kind.characterFilter) else { + return nil + } + guard let i = UInt16(str.value, radix: kind.radix) else { + error(.numberOverflow(str.value), at: str.location) + return .init(nil, at: str.location) + } + + return .init(Int(i), at: str.location) + } + + /// Expect a number of a given `kind`, diagnosing if a number cannot be /// parsed. mutating func expectNumber(_ kind: RadixKind = .decimal) -> AST.Atom.Number { @@ -492,7 +514,7 @@ extension Parser { return p.tryEating { p in guard p.tryEat("{"), - let range = p.lexRange(trivia: &trivia), + let range = p.lexQuantRange(trivia: &trivia), p.tryEat("}") else { return nil } return range.value @@ -519,12 +541,14 @@ extension Parser { /// | ExpRange /// ExpRange -> '..<' | '...' /// | '..<' | '...' ? - mutating func lexRange(trivia: inout [AST.Trivia]) -> Located? { + mutating func lexQuantRange( + trivia: inout [AST.Trivia] + ) -> Located? { recordLoc { p in p.tryEating { p in if let t = p.lexWhitespace() { trivia.append(t) } - let lowerOpt = p.lexNumber() + let lowerOpt = p.lexQuantBound() if let t = p.lexWhitespace() { trivia.append(t) } @@ -546,7 +570,7 @@ extension Parser { if let t = p.lexWhitespace() { trivia.append(t) } - var upperOpt = p.lexNumber() + var upperOpt = p.lexQuantBound() if closedRange == false { // If we have an open range, the upper bound should be adjusted down. upperOpt?.value? -= 1 diff --git a/Tests/RegexTests/LexTests.swift b/Tests/RegexTests/LexTests.swift index 53775e66..ccfd18eb 100644 --- a/Tests/RegexTests/LexTests.swift +++ b/Tests/RegexTests/LexTests.swift @@ -63,6 +63,25 @@ extension RegexTests { _ = p.lexNumber() } + let invalidQuantBounds: Array = [ + "65536", // UInt16.max + 1 + "2147483646", // Int32.max - 1 + "9223372036854775806", // Int64.max - 1 + ] + + for invalidNum in invalidQuantBounds { + let regexes: Array = [ + "x{\(invalidNum)}", + "x{1,\(invalidNum)}", + "x{\(invalidNum),1}", + ] + for regex in regexes { + diagnose(regex, expecting: .numberOverflow(invalidNum)) { p in + _ = p.parse() + } + } + } + // TODO: want to dummy print out source ranges, etc, test that. } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index c52560d6..017005e5 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -751,6 +751,12 @@ extension RegexTests { firstMatchTest("(?U)a??", input: "a", match: "a") firstMatchTest("(?U)a??a", input: "aaa", match: "aa") + // Quantification syntax is somewhat dependent on the contents. + // In JS, PCRE2, Python, and some others, /x{-1}/ will be literally "x{-1}" + // Note that Java8 and Rust throw an (unhelpful) error + firstMatchTest("x{-1}", input: "x{-1}", match: "x{-1}") + firstMatchTest("x{-1}", input: "xax{-2}bx{-1}c", match: "x{-1}") + // TODO: After captures, easier to test these }