diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index ee8e487dd96ad9..3ed8102ed49d1f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -1606,22 +1606,33 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax) // Coalescing a loop with its same type case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy when nextNode.Kind == currentNode.Kind && currentNode.Ch == nextNode.Ch: case RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy when nextNode.Kind == currentNode.Kind && currentNode.Str == nextNode.Str: - if (CanCombineCounts(currentNode.M, currentNode.N, nextNode.M, nextNode.N)) + if (nextNode.M > 0 && + currentNode.Kind is RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Setloopatomic) { - currentNode.M += nextNode.M; - if (currentNode.N != int.MaxValue) - { - currentNode.N = nextNode.N == int.MaxValue ? int.MaxValue : currentNode.N + nextNode.N; - } - next++; - continue; + // Atomic loops can only be combined if the second loop has no lower bound, as if it has a lower bound, + // combining them changes behavior. Uncombined, the first loop can consume all matching items; + // the second loop might then not be able to meet its minimum and fail. But if they're combined, the combined + // minimum of the sole loop could now be met, introducing matches where there shouldn't have been any. + break; } - break; + + if (!CanCombineCounts(currentNode.M, currentNode.N, nextNode.M, nextNode.N)) + { + break; + } + + currentNode.M += nextNode.M; + if (currentNode.N != int.MaxValue) + { + currentNode.N = nextNode.N == int.MaxValue ? int.MaxValue : currentNode.N + nextNode.N; + } + next++; + continue; // Coalescing a loop with an additional item of the same type - case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.One && currentNode.Ch == nextNode.Ch: - case RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy when nextNode.Kind == RegexNodeKind.Notone && currentNode.Ch == nextNode.Ch: - case RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy when nextNode.Kind == RegexNodeKind.Set && currentNode.Str == nextNode.Str: + case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.One && currentNode.Ch == nextNode.Ch: + case RegexNodeKind.Notoneloop or RegexNodeKind.Notonelazy when nextNode.Kind == RegexNodeKind.Notone && currentNode.Ch == nextNode.Ch: + case RegexNodeKind.Setloop or RegexNodeKind.Setlazy when nextNode.Kind == RegexNodeKind.Set && currentNode.Str == nextNode.Str: if (CanCombineCounts(currentNode.M, currentNode.N, 1, 1)) { currentNode.M++; @@ -1635,7 +1646,7 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax) break; // Coalescing a loop with a subsequent string - case RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.Multi && currentNode.Ch == nextNode.Str![0]: + case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.Multi && currentNode.Ch == nextNode.Str![0]: { // Determine how many of the multi's characters can be combined. // We already checked for the first, so we know it's at least one. @@ -2056,15 +2067,15 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i case RegexNodeKind.Multi when node.Ch != subsequent.Str![0]: case RegexNodeKind.End: case RegexNodeKind.EndZ or RegexNodeKind.Eol when node.Ch != '\n': - case RegexNodeKind.Boundary when RegexCharClass.IsBoundaryWordChar(node.Ch): - case RegexNodeKind.NonBoundary when !RegexCharClass.IsBoundaryWordChar(node.Ch): - case RegexNodeKind.ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch): - case RegexNodeKind.NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch): return true; case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && node.Ch != subsequent.Ch: case RegexNodeKind.Notonelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic when subsequent.M == 0 && node.Ch == subsequent.Ch: case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!): + case RegexNodeKind.Boundary when RegexCharClass.IsBoundaryWordChar(node.Ch): + case RegexNodeKind.NonBoundary when !RegexCharClass.IsBoundaryWordChar(node.Ch): + case RegexNodeKind.ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch): + case RegexNodeKind.NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch): // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well. break; @@ -2103,14 +2114,14 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i case RegexNodeKind.Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!): case RegexNodeKind.End: case RegexNodeKind.EndZ or RegexNodeKind.Eol when !RegexCharClass.CharInClass('\n', node.Str!): - case RegexNodeKind.Boundary when node.Str is RegexCharClass.WordClass or RegexCharClass.DigitClass: - case RegexNodeKind.NonBoundary when node.Str is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass: - case RegexNodeKind.ECMABoundary when node.Str is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass: - case RegexNodeKind.NonECMABoundary when node.Str is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass: return true; case RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic when subsequent.M == 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!): case RegexNodeKind.Setlazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic when subsequent.M == 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!): + case RegexNodeKind.Boundary when node.Str is RegexCharClass.WordClass or RegexCharClass.DigitClass: + case RegexNodeKind.NonBoundary when node.Str is RegexCharClass.NotWordClass or RegexCharClass.NotDigitClass: + case RegexNodeKind.ECMABoundary when node.Str is RegexCharClass.ECMAWordClass or RegexCharClass.ECMADigitClass: + case RegexNodeKind.NonECMABoundary when node.Str is RegexCharClass.NotECMAWordClass or RegexCharClass.NotDigitClass: // The loop can be made atomic based on this subsequent node, but we'll need to evaluate the next one as well. break; @@ -2125,8 +2136,6 @@ private static bool CanBeMadeAtomic(RegexNode node, RegexNode subsequent, bool i // We only get here if the node could be made atomic based on subsequent but subsequent has a lower bound of zero // and thus we need to move subsequent to be the next node in sequence and loop around to try again. - Debug.Assert(subsequent.Kind is RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or RegexNodeKind.Onelazy or RegexNodeKind.Notoneloop or RegexNodeKind.Notoneloopatomic or RegexNodeKind.Notonelazy or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy); - Debug.Assert(subsequent.M == 0); if (!iterateNullableSubsequent) { return false; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 02182bd87ede6e..9f73f9f1c30e88 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -149,6 +149,15 @@ public static IEnumerable Match_MemberData() yield return (Case("(?>[^z]+)z"), "zzzzxyxyxyz123", options, 4, 9, true, "xyxyxyz"); yield return (Case("(?>(?>[^z]+))z"), "zzzzxyxyxyz123", options, 4, 9, true, "xyxyxyz"); yield return (Case("(?>[^z]*)z123"), "zzzzxyxyxyz123", options, 4, 10, true, "xyxyxyz123"); + yield return (Case("(?>a*)a"), "aaa", options, 0, 3, false, ""); + yield return (Case("(?>a*)a+"), "aaa", options, 0, 3, false, ""); + yield return (Case("(?>a+)a+"), "aaa", options, 0, 3, false, ""); + yield return (Case("(?>.*)."), "aaa", options, 0, 3, false, ""); + yield return (Case("(?>.*).+"), "aaa", options, 0, 3, false, ""); + yield return (Case("(?>.+).+"), "aaa", options, 0, 3, false, ""); + yield return (Case("(?>\\w*)\\w"), "aaa", options, 0, 3, false, ""); + yield return (Case("(?>\\w*)\\w+"), "aaa", options, 0, 3, false, ""); + yield return (Case("(?>\\w+)\\w+"), "aaa", options, 0, 3, false, ""); yield return (Case("(?>[^12]+)1"), "121231", options, 0, 6, true, "31"); yield return (Case("(?>[^123]+)1"), "12312341", options, 0, 8, true, "41"); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs index 744fd174351171..bd0b1eab45ce37 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs @@ -276,6 +276,22 @@ public static IEnumerable Matches_TestData() } }; + yield return new object[] + { + engine, + @"\w*\b\w+", "abc def ghij kl m nop qrstuv", RegexOptions.None, + new[] + { + new CaptureData("abc", 0, 3), + new CaptureData("def", 4, 3), + new CaptureData("ghij", 8, 4), + new CaptureData("kl", 13, 2), + new CaptureData("m", 16, 1), + new CaptureData("nop", 18, 3), + new CaptureData("qrstuv", 22, 6), + } + }; + if (!PlatformDetection.IsNetFramework) { // .NET Framework missing fix in https://github.com/dotnet/runtime/pull/1075 @@ -294,6 +310,20 @@ public static IEnumerable Matches_TestData() if (!RegexHelpers.IsNonBacktracking(engine)) { + yield return new object[] + { + engine, + @"(\b(?!ab|nop)\w*\b)\w+", "abc def ghij kl m nop qrstuv", RegexOptions.None, + new[] + { + new CaptureData("def", 4, 3), + new CaptureData("ghij", 8, 4), + new CaptureData("kl", 13, 2), + new CaptureData("m", 16, 1), + new CaptureData("qrstuv", 22, 6), + } + }; + yield return new object[] { engine, diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs index b21b26071bc2e6..dbb22cf5026485 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs @@ -50,26 +50,14 @@ public class RegexReductionTests // Two atomic one loops [InlineData("(?>a*)(?>a*)", "(?>a*)")] [InlineData("(?>a*)(?>(?:a*))", "(?>a*)")] - [InlineData("(?>a*)(?>a+)", "(?>a+)")] [InlineData("(?>a*)(?>a?)", "(?>a*)")] - [InlineData("(?>a*)(?>a{1,3})", "(?>a+)")] [InlineData("(?>a+)(?>a*)", "(?>a+)")] - [InlineData("(?>a+)(?>a+)", "(?>a{2,})")] [InlineData("(?>a+)(?>a?)", "(?>a+)")] - [InlineData("(?>a+)(?>a{1,3})", "(?>a{2,})")] [InlineData("(?>a?)(?>a*)", "(?>a*)")] - [InlineData("(?>a?)(?>a+)", "(?>a+)")] [InlineData("(?>a?)(?>a?)", "(?>a{0,2})")] - [InlineData("(?>a?)(?>a{1,3})", "(?>a{1,4})")] [InlineData("(?>a{1,3})(?>a*)", "(?>a+)")] - [InlineData("(?>a{1,3})(?>a+)", "(?>a{2,})")] [InlineData("(?>a{1,3})(?>a?)", "(?>a{1,4})")] - [InlineData("(?>a{1,3})(?>a{1,3})", "(?>a{2,6})")] - // Atomic one loop and one - [InlineData("(?>a*)a", "(?>a+)")] - [InlineData("(?>a+)a", "(?>a{2,})")] - [InlineData("(?>a?)a", "(?>a{1,2})")] - [InlineData("(?>a{1,3})a", "(?>a{2,4})")] + // One and atomic one loop [InlineData("a(?>a*)", "(?>a+)")] [InlineData("a(?>a+)", "(?>a{2,})")] [InlineData("a(?>a?)", "(?>a{1,2})")] @@ -136,21 +124,13 @@ public class RegexReductionTests [InlineData("[^a]{1,3}?[^a]{1,3}?", "[^a]{2,6}?")] // Two atomic notone loops [InlineData("(?>[^a]*)(?>[^a]*)", "(?>[^a]*)")] - [InlineData("(?>[^a]*)(?>[^a]+)", "(?>[^a]+)")] [InlineData("(?>[^a]*)(?>[^a]?)", "(?>[^a]*)")] - [InlineData("(?>[^a]*)(?>[^a]{1,3})", "(?>[^a]+)")] [InlineData("(?>[^a]+)(?>[^a]*)", "(?>[^a]+)")] - [InlineData("(?>[^a]+)(?>[^a]+)", "(?>[^a]{2,})")] [InlineData("(?>[^a]+)(?>[^a]?)", "(?>[^a]+)")] - [InlineData("(?>[^a]+)(?>[^a]{1,3})", "(?>[^a]{2,})")] [InlineData("(?>[^a]?)(?>[^a]*)", "(?>[^a]*)")] - [InlineData("(?>[^a]?)(?>[^a]+)", "(?>[^a]+)")] [InlineData("(?>[^a]?)(?>[^a]?)", "(?>[^a]{0,2})")] - [InlineData("(?>[^a]?)(?>[^a]{1,3})", "(?>[^a]{1,4})")] [InlineData("(?>[^a]{1,3})(?>[^a]*)", "(?>[^a]+)")] - [InlineData("(?>[^a]{1,3})(?>[^a]+)", "(?>[^a]{2,})")] [InlineData("(?>[^a]{1,3})(?>[^a]?)", "(?>[^a]{1,4})")] - [InlineData("(?>[^a]{1,3})(?>[^a]{1,3})", "(?>[^a]{2,6})")] // Greedy notone loop and notone [InlineData("[^a]*[^a]", "[^a]+")] [InlineData("[^a]+[^a]", "[^a]{2,}")] @@ -169,11 +149,7 @@ public class RegexReductionTests [InlineData("[^a][^a]+?", "[^a]{2,}?")] [InlineData("[^a][^a]??", "[^a]{1,2}?")] [InlineData("[^a][^a]{1,3}?", "[^a]{2,4}?")] - // Atomic notone loop and notone - [InlineData("(?>[^a]*)[^a]", "(?>[^a]+)")] - [InlineData("(?>[^a]+)[^a]", "(?>[^a]{2,})")] - [InlineData("(?>[^a]?)[^a]", "(?>[^a]{1,2})")] - [InlineData("(?>[^a]{1,3})[^a]", "(?>[^a]{2,4})")] + // Notone and atomic notone loop [InlineData("[^a](?>[^a]*)", "(?>[^a]+)")] [InlineData("[^a](?>[^a]+)", "(?>[^a]{2,})")] [InlineData("[^a](?>[^a]?)", "(?>[^a]{1,2})")] @@ -206,11 +182,7 @@ public class RegexReductionTests [InlineData("[0-9][0-9]+", "[0-9]{2,}")] [InlineData("[0-9][0-9]?", "[0-9]{1,2}")] [InlineData("[0-9][0-9]{1,3}", "[0-9]{2,4}")] - // Atomic set loop and set - [InlineData("(?>[0-9]*)[0-9]", "(?>[0-9]+)")] - [InlineData("(?>[0-9]+)[0-9]", "(?>[0-9]{2,})")] - [InlineData("(?>[0-9]?)[0-9]", "(?>[0-9]{1,2})")] - [InlineData("(?>[0-9]{1,3})[0-9]", "(?>[0-9]{2,4})")] + // Set and atomic set loop [InlineData("[0-9](?>[0-9]*)", "(?>[0-9]+)")] [InlineData("[0-9](?>[0-9]+)", "(?>[0-9]{2,})")] [InlineData("[0-9](?>[0-9]?)", "(?>[0-9]{1,2})")] @@ -234,21 +206,13 @@ public class RegexReductionTests [InlineData("[0-9]{1,3}?[0-9]{1,3}?", "[0-9]{2,6}?")] // Two atomic set loops [InlineData("(?>[0-9]*)(?>[0-9]*)", "(?>[0-9]*)")] - [InlineData("(?>[0-9]*)(?>[0-9]+)", "(?>[0-9]+)")] [InlineData("(?>[0-9]*)(?>[0-9]?)", "(?>[0-9]*)")] - [InlineData("(?>[0-9]*)(?>[0-9]{1,3})", "(?>[0-9]+)")] [InlineData("(?>[0-9]+)(?>[0-9]*)", "(?>[0-9]+)")] - [InlineData("(?>[0-9]+)(?>[0-9]+)", "(?>[0-9]{2,})")] [InlineData("(?>[0-9]+)(?>[0-9]?)", "(?>[0-9]+)")] - [InlineData("(?>[0-9]+)(?>[0-9]{1,3})", "(?>[0-9]{2,})")] [InlineData("(?>[0-9]?)(?>[0-9]*)", "(?>[0-9]*)")] - [InlineData("(?>[0-9]?)(?>[0-9]+)", "(?>[0-9]+)")] [InlineData("(?>[0-9]?)(?>[0-9]?)", "(?>[0-9]{0,2})")] - [InlineData("(?>[0-9]?)(?>[0-9]{1,3})", "(?>[0-9]{1,4})")] [InlineData("(?>[0-9]{1,3})(?>[0-9]*)", "(?>[0-9]+)")] - [InlineData("(?>[0-9]{1,3})(?>[0-9]+)", "(?>[0-9]{2,})")] [InlineData("(?>[0-9]{1,3})(?>[0-9]?)", "(?>[0-9]{1,4})")] - [InlineData("(?>[0-9]{1,3})(?>[0-9]{1,3})", "(?>[0-9]{2,6})")] // Lazy set loop and set [InlineData("[0-9]*?[0-9]", "[0-9]+?")] [InlineData("[0-9]+?[0-9]", "[0-9]{2,}?")] @@ -381,6 +345,8 @@ public class RegexReductionTests [InlineData("(?:w*)+\\.", "(?>w*)+\\.")] [InlineData("(a[bcd]e*)*fg", "(a[bcd](?>e*))*fg")] [InlineData("(\\w[bcd]\\s*)*fg", "(\\w[bcd](?>\\s*))*fg")] + [InlineData(@"\b(\w+)\b", @"\b((?>\w+))\b")] + [InlineData(@"\b(?:\w+)\b ", @"\b(?>\w+)\b ")] // Nothing handling [InlineData(@"\wabc(?!)def", "(?!)")] [InlineData(@"\wabc(?!)def|ghi(?!)", "(?!)")] @@ -423,6 +389,42 @@ public void PatternsReduceIdentically(string actual, string expected) [InlineData("a*?a*", "a*")] [InlineData("a*[^a]*", "a*")] [InlineData("[^a]*a*", "a*")] + [InlineData("(?>a*)(?>a+)", "(?>a+)")] + [InlineData("(?>a*)(?>a{1,3})", "(?>a+)")] + [InlineData("(?>a+)(?>a+)", "(?>a{2,})")] + [InlineData("(?>a+)(?>a{1,3})", "(?>a{2,})")] + [InlineData("(?>a?)(?>a+)", "(?>a+)")] + [InlineData("(?>a?)(?>a{1,3})", "(?>a{1,4})")] + [InlineData("(?>a{1,3})(?>a+)", "(?>a{2,})")] + [InlineData("(?>a{1,3})(?>a{1,3})", "(?>a{2,6})")] + [InlineData("(?>[^a]*)(?>[^a]+)", "(?>[^a]+)")] + [InlineData("(?>[^a]*)(?>[^a]{1,3})", "(?>[^a]+)")] + [InlineData("(?>[^a]+)(?>[^a]+)", "(?>[^a]{2,})")] + [InlineData("(?>[^a]+)(?>[^a]{1,3})", "(?>[^a]{2,})")] + [InlineData("(?>[^a]?)(?>[^a]+)", "(?>[^a]+)")] + [InlineData("(?>[^a]?)(?>[^a]{1,3})", "(?>[^a]{1,4})")] + [InlineData("(?>[^a]{1,3})(?>[^a]+)", "(?>[^a]{2,})")] + [InlineData("(?>[^a]{1,3})(?>[^a]{1,3})", "(?>[^a]{2,6})")] + [InlineData("(?>[0-9]*)(?>[0-9]+)", "(?>[0-9]+)")] + [InlineData("(?>[0-9]*)(?>[0-9]{1,3})", "(?>[0-9]+)")] + [InlineData("(?>[0-9]+)(?>[0-9]+)", "(?>[0-9]{2,})")] + [InlineData("(?>[0-9]+)(?>[0-9]{1,3})", "(?>[0-9]{2,})")] + [InlineData("(?>[0-9]?)(?>[0-9]+)", "(?>[0-9]+)")] + [InlineData("(?>[0-9]?)(?>[0-9]{1,3})", "(?>[0-9]{1,4})")] + [InlineData("(?>[0-9]{1,3})(?>[0-9]+)", "(?>[0-9]{2,})")] + [InlineData("(?>[0-9]{1,3})(?>[0-9]{1,3})", "(?>[0-9]{2,6})")] + [InlineData("(?>a*)a", "(?>a+)")] + [InlineData("(?>a+)a", "(?>a{2,})")] + [InlineData("(?>a?)a", "(?>a{1,2})")] + [InlineData("(?>a{1,3})a", "(?>a{2,4})")] + [InlineData("(?>[^a]*)[^a]", "(?>[^a]+)")] + [InlineData("(?>[^a]+)[^a]", "(?>[^a]{2,})")] + [InlineData("(?>[^a]?)[^a]", "(?>[^a]{1,2})")] + [InlineData("(?>[^a]{1,3})[^a]", "(?>[^a]{2,4})")] + [InlineData("(?>[0-9]*)[0-9]", "(?>[0-9]+)")] + [InlineData("(?>[0-9]+)[0-9]", "(?>[0-9]{2,})")] + [InlineData("(?>[0-9]?)[0-9]", "(?>[0-9]{1,2})")] + [InlineData("(?>[0-9]{1,3})[0-9]", "(?>[0-9]{2,4})")] [InlineData("a{2147483646}a", "a{2147483647}")] [InlineData("a{2147483647}a", "a{2147483647}")] [InlineData("a{0,2147483646}a", "a{0,2147483647}")] @@ -470,6 +472,10 @@ public void PatternsReduceIdentically(string actual, string expected) [InlineData("(?:ab??){2}", "(?:a(?>b??)){2}")] [InlineData("(?:ab??){2, 3}", "(?:a(?>b??)){2, 3}")] [InlineData("ab??(b)", "a(?>b??)(b)")] + [InlineData(@"\w+\b\w+", @"(?>\w+)\b\w")] + [InlineData(@"\w*\b\w+", @"(?>\w*)\b\w+")] + [InlineData(@"\W+\B\W+", @"(?>\W+)\B\W")] + [InlineData(@"\W*\B\W+", @"(?>\W*)\B\W")] // Loops inside alternation constructs [InlineData("(abc*|def)chi", "(ab(?>c*)|def)chi")] [InlineData("(abc|def*)fhi", "(abc|de(?>f*))fhi")] @@ -505,6 +511,9 @@ public void PatternsReduceDifferently(string actual, string expected) [InlineData(@"a??", RegexOptions.None, 0, 1)] [InlineData(@"a+", RegexOptions.None, 1, null)] [InlineData(@"a+?", RegexOptions.None, 1, null)] + [InlineData(@"(?>a*)a", RegexOptions.None, 1, null)] + [InlineData(@"(?>a*)a+", RegexOptions.None, 1, null)] + [InlineData(@"(?>a*)a*", RegexOptions.None, 0, null)] [InlineData(@"a{2}", RegexOptions.None, 2, 2)] [InlineData(@"a{2}?", RegexOptions.None, 2, 2)] [InlineData(@"a{3,17}", RegexOptions.None, 3, 17)]