Skip to content

Commit ee61186

Browse files
mauri870gopherbot
authored andcommitted
regexp/syntax: accept (?<name>...) syntax as valid capture
Currently the only named capture supported by regexp is (?P<name>a). The syntax (?<name>a) is also widely used and there is currently an effort from the Rust regex and RE2 teams to also accept this syntax. Fixes #58458 Change-Id: If22d44d3a5c4e8133ec68238ab130c151ca7c5c5 GitHub-Last-Rev: 31b50e6 GitHub-Pull-Request: #61624 Reviewed-on: https://go-review.googlesource.com/c/go/+/513838 Auto-Submit: Ian Lance Taylor <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: David Chase <[email protected]> Run-TryBot: Ian Lance Taylor <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 977e23a commit ee61186

File tree

3 files changed

+21
-5
lines changed

3 files changed

+21
-5
lines changed

src/regexp/syntax/doc.go

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Grouping:
5656
5757
(re) numbered capturing group (submatch)
5858
(?P<name>re) named & numbered capturing group (submatch)
59+
(?<name>re) named & numbered capturing group (submatch)
5960
(?:re) non-capturing group
6061
(?flags) set flags within current group; non-capturing
6162
(?flags:re) set flags during re; non-capturing

src/regexp/syntax/parse.go

+14-5
Original file line numberDiff line numberDiff line change
@@ -1159,9 +1159,18 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) {
11591159
// support all three as well. EcmaScript 4 uses only the Python form.
11601160
//
11611161
// In both the open source world (via Code Search) and the
1162-
// Google source tree, (?P<expr>name) is the dominant form,
1163-
// so that's the one we implement. One is enough.
1164-
if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
1162+
// Google source tree, (?P<expr>name) and (?<expr>name) are the
1163+
// dominant forms of named captures and both are supported.
1164+
startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<'
1165+
startsWithName := len(t) > 3 && t[2] == '<'
1166+
1167+
if startsWithP || startsWithName {
1168+
// position of expr start
1169+
exprStartPos := 4
1170+
if startsWithName {
1171+
exprStartPos = 3
1172+
}
1173+
11651174
// Pull out name.
11661175
end := strings.IndexRune(t, '>')
11671176
if end < 0 {
@@ -1171,8 +1180,8 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) {
11711180
return "", &Error{ErrInvalidNamedCapture, s}
11721181
}
11731182

1174-
capture := t[:end+1] // "(?P<name>"
1175-
name := t[4:end] // "name"
1183+
capture := t[:end+1] // "(?P<name>" or "(?<name>"
1184+
name := t[exprStartPos:end] // "name"
11761185
if err = checkUTF8(name); err != nil {
11771186
return "", err
11781187
}

src/regexp/syntax/parse_test.go

+6
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ var parseTests = []parseTest{
160160

161161
// Test named captures
162162
{`(?P<name>a)`, `cap{name:lit{a}}`},
163+
{`(?<name>a)`, `cap{name:lit{a}}`},
163164

164165
// Case-folded literals
165166
{`[Aa]`, `litfold{A}`},
@@ -482,6 +483,11 @@ var invalidRegexps = []string{
482483
`(?P<name`,
483484
`(?P<x y>a)`,
484485
`(?P<>a)`,
486+
`(?<name>a`,
487+
`(?<name>`,
488+
`(?<name`,
489+
`(?<x y>a)`,
490+
`(?<>a)`,
485491
`[a-Z]`,
486492
`(?i)[a-Z]`,
487493
`\Q\E*`,

0 commit comments

Comments
 (0)