Skip to content

Commit da12812

Browse files
francisco3ferrazgopherbot
authored andcommitted
go/analysis/passes/modernize: fix stringscut false positive for unguarded afterSlice
After i = strings.Index(s, ":"), with a single-byte separator, it is common to use s[i+1:], regardless of the sign of i, as a safe way to express "the part after the colon, or the whole string if missing". However, the modernizer transformation to: before, after, _ = strings.Cut(s, ":") ... use(after) is not sound when i < 0 case because Cut returns after == "" in this case. The solution is to apply the transformation only when the s[i+1:] expression is dominated by a check that i is non-negative. Fixes golang/go#77737 Change-Id: Iaf0238e1fbcaf70ba3b4afe571474cfbb467b3e4 Reviewed-on: https://go-review.googlesource.com/c/tools/+/747900 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Madeline Kalil <mkalil@google.com> Reviewed-by: Alan Donovan <adonovan@google.com> Auto-Submit: Alan Donovan <adonovan@google.com>
1 parent 7e46809 commit da12812

File tree

3 files changed

+297
-71
lines changed

3 files changed

+297
-71
lines changed

go/analysis/passes/modernize/stringscut.go

Lines changed: 128 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ func stringscut(pass *analysis.Pass) (any, error) {
183183
// len(substr)]), then we can replace the call to Index()
184184
// with a call to Cut() and use the returned ok, before,
185185
// and after variables accordingly.
186-
negative, nonnegative, beforeSlice, afterSlice := checkIdxUses(pass.TypesInfo, index.Uses(iObj), s, substr)
186+
negative, nonnegative, beforeSlice, afterSlice := checkIdxUses(pass.TypesInfo, index.Uses(iObj), s, substr, iObj)
187187

188188
// Either there are no uses of before, after, or ok, or some use
189189
// of i does not match our criteria - don't suggest a fix.
@@ -374,14 +374,31 @@ func indexArgValid(info *types.Info, index *typeindex.Index, expr ast.Expr, afte
374374
// 2. nonnegative - a condition equivalent to i >= 0
375375
// 3. beforeSlice - a slice of `s` that matches either s[:i], s[0:i]
376376
// 4. afterSlice - a slice of `s` that matches one of: s[i+len(substr):], s[len(substr) + i:], s[i + const], s[k + i] (where k = len(substr))
377-
func checkIdxUses(info *types.Info, uses iter.Seq[inspector.Cursor], s, substr ast.Expr) (negative, nonnegative, beforeSlice, afterSlice []ast.Expr) {
377+
//
378+
// Additionally, all beforeSlice and afterSlice uses must be dominated by a
379+
// nonnegative guard on i (i.e., inside the body of an if whose condition
380+
// checks i >= 0, or in the else of a negative check, or after an
381+
// early-return negative check). This ensures that the rewrite from
382+
// s[i+len(sep):] to "after" preserves semantics, since when i == -1,
383+
// s[i+len(sep):] may yield a valid substring (e.g. s[0:] for single-byte
384+
// separators), but "after" would be "".
385+
//
386+
// When len(substr)==1, it's safe to use s[i+1:] even when i < 0.
387+
// Otherwise, each replacement of s[i+1:] must be guarded by a check
388+
// that i is nonnegative.
389+
func checkIdxUses(info *types.Info, uses iter.Seq[inspector.Cursor], s, substr ast.Expr, iObj types.Object) (negative, nonnegative, beforeSlice, afterSlice []ast.Expr) {
390+
requireGuard := true
391+
if l := constSubstrLen(info, substr); l != -1 && l != 1 {
392+
requireGuard = false
393+
}
394+
378395
use := func(cur inspector.Cursor) bool {
379396
ek := cur.ParentEdgeKind()
380397
n := cur.Parent().Node()
381398
switch ek {
382399
case edge.BinaryExpr_X, edge.BinaryExpr_Y:
383400
check := n.(*ast.BinaryExpr)
384-
switch checkIdxComparison(info, check) {
401+
switch checkIdxComparison(info, check, iObj) {
385402
case -1:
386403
negative = append(negative, check)
387404
return true
@@ -397,10 +414,10 @@ func checkIdxUses(info *types.Info, uses iter.Seq[inspector.Cursor], s, substr a
397414
if slice, ok := cur.Parent().Parent().Node().(*ast.SliceExpr); ok &&
398415
sameObject(info, s, slice.X) &&
399416
slice.Max == nil {
400-
if isBeforeSlice(info, ek, slice) {
417+
if isBeforeSlice(info, ek, slice) && (!requireGuard || isSliceIndexGuarded(info, cur, iObj)) {
401418
beforeSlice = append(beforeSlice, slice)
402419
return true
403-
} else if isAfterSlice(info, ek, slice, substr) {
420+
} else if isAfterSlice(info, ek, slice, substr) && (!requireGuard || isSliceIndexGuarded(info, cur, iObj)) {
404421
afterSlice = append(afterSlice, slice)
405422
return true
406423
}
@@ -410,10 +427,10 @@ func checkIdxUses(info *types.Info, uses iter.Seq[inspector.Cursor], s, substr a
410427
// Check that the thing being sliced is s and that the slice doesn't
411428
// have a max index.
412429
if sameObject(info, s, slice.X) && slice.Max == nil {
413-
if isBeforeSlice(info, ek, slice) {
430+
if isBeforeSlice(info, ek, slice) && (!requireGuard || isSliceIndexGuarded(info, cur, iObj)) {
414431
beforeSlice = append(beforeSlice, slice)
415432
return true
416-
} else if isAfterSlice(info, ek, slice, substr) {
433+
} else if isAfterSlice(info, ek, slice, substr) && (!requireGuard || isSliceIndexGuarded(info, cur, iObj)) {
417434
afterSlice = append(afterSlice, slice)
418435
return true
419436
}
@@ -465,8 +482,15 @@ func hasModifyingUses(info *types.Info, uses iter.Seq[inspector.Cursor], afterPo
465482
// Since strings.Index returns exactly -1 if the substring is not found, we
466483
// don't need to handle expressions like i <= -3.
467484
// We return 0 if the expression does not match any of these options.
468-
// We assume that a check passed to checkIdxComparison has i as one of its operands.
469-
func checkIdxComparison(info *types.Info, check *ast.BinaryExpr) int {
485+
func checkIdxComparison(info *types.Info, check *ast.BinaryExpr, iObj types.Object) int {
486+
isI := func(e ast.Expr) bool {
487+
id, ok := e.(*ast.Ident)
488+
return ok && info.Uses[id] == iObj
489+
}
490+
if !isI(check.X) && !isI(check.Y) {
491+
return 0
492+
}
493+
470494
// Ensure that the constant (if any) is on the right.
471495
x, op, y := check.X, check.Op, check.Y
472496
if info.Types[x].Value != nil {
@@ -515,44 +539,49 @@ func isBeforeSlice(info *types.Info, ek edge.Kind, slice *ast.SliceExpr) bool {
515539
return ek == edge.SliceExpr_High && (slice.Low == nil || isZeroIntConst(info, slice.Low))
516540
}
517541

518-
// isAfterSlice reports whether the SliceExpr is of the form s[i+len(substr):],
519-
// or s[i + k:] where k is a const is equal to len(substr).
520-
func isAfterSlice(info *types.Info, ek edge.Kind, slice *ast.SliceExpr, substr ast.Expr) bool {
521-
lowExpr, ok := slice.Low.(*ast.BinaryExpr)
522-
if !ok || slice.High != nil {
523-
return false
524-
}
525-
// Returns true if the expression is a call to len(substr).
526-
isLenCall := func(expr ast.Expr) bool {
527-
call, ok := expr.(*ast.CallExpr)
528-
if !ok || len(call.Args) != 1 {
529-
return false
530-
}
531-
return sameObject(info, substr, call.Args[0]) && typeutil.Callee(info, call) == builtinLen
532-
}
533-
542+
// constSubstrLen returns the constant length of substr, or -1 if unknown.
543+
func constSubstrLen(info *types.Info, substr ast.Expr) int {
534544
// Handle len([]byte(substr))
535-
if is[*ast.CallExpr](substr) {
536-
call := substr.(*ast.CallExpr)
545+
if call, ok := substr.(*ast.CallExpr); ok {
537546
tv := info.Types[call.Fun]
538547
if tv.IsType() && types.Identical(tv.Type, byteSliceType) {
539548
// Only one arg in []byte conversion.
540549
substr = call.Args[0]
541550
}
542551
}
543-
substrLen := -1
544552
substrVal := info.Types[substr].Value
545553
if substrVal != nil {
546554
switch substrVal.Kind() {
547555
case constant.String:
548-
substrLen = len(constant.StringVal(substrVal))
556+
return len(constant.StringVal(substrVal))
549557
case constant.Int:
550558
// constant.Value is a byte literal, e.g. bytes.IndexByte(_, 'a')
551559
// or a numeric byte literal, e.g. bytes.IndexByte(_, 65)
552-
substrLen = 1
560+
// ([]byte(rune) is not legal.)
561+
return 1
562+
}
563+
}
564+
return -1
565+
}
566+
567+
// isAfterSlice reports whether the SliceExpr is of the form s[i+len(substr):],
568+
// or s[i + k:] where k is a const is equal to len(substr).
569+
func isAfterSlice(info *types.Info, ek edge.Kind, slice *ast.SliceExpr, substr ast.Expr) bool {
570+
lowExpr, ok := slice.Low.(*ast.BinaryExpr)
571+
if !ok || slice.High != nil {
572+
return false
573+
}
574+
// Returns true if the expression is a call to len(substr).
575+
isLenCall := func(expr ast.Expr) bool {
576+
call, ok := expr.(*ast.CallExpr)
577+
if !ok || len(call.Args) != 1 {
578+
return false
553579
}
580+
return sameObject(info, substr, call.Args[0]) && typeutil.Callee(info, call) == builtinLen
554581
}
555582

583+
substrLen := constSubstrLen(info, substr)
584+
556585
switch ek {
557586
case edge.BinaryExpr_X:
558587
kVal := info.Types[lowExpr.Y].Value
@@ -578,6 +607,75 @@ func isAfterSlice(info *types.Info, ek edge.Kind, slice *ast.SliceExpr, substr a
578607
return false
579608
}
580609

610+
// isSliceIndexGuarded reports whether a use of the index variable i (at the given cursor)
611+
// inside a slice expression is dominated by a nonnegative guard.
612+
// A use is considered guarded if any of the following are true:
613+
// - It is inside the Body of an IfStmt whose condition is a nonnegative check on i.
614+
// - It is inside the Else of an IfStmt whose condition is a negative check on i.
615+
// - It is preceded (in the same block) by an IfStmt whose condition is a
616+
// negative check on i with a terminating body (e.g., early return).
617+
//
618+
// Conversely, a use is immediately rejected if:
619+
// - It is inside the Body of an IfStmt whose condition is a negative check on i.
620+
// - It is inside the Else of an IfStmt whose condition is a nonnegative check on i.
621+
//
622+
// We have already checked (see [hasModifyingUses]) that there are no
623+
// intervening uses (incl. via aliases) of i that might alter its value.
624+
func isSliceIndexGuarded(info *types.Info, cur inspector.Cursor, iObj types.Object) bool {
625+
for anc := range cur.Enclosing() {
626+
switch anc.ParentEdgeKind() {
627+
case edge.IfStmt_Body, edge.IfStmt_Else:
628+
ifStmt := anc.Parent().Node().(*ast.IfStmt)
629+
check := condChecksIdx(info, ifStmt.Cond, iObj)
630+
if anc.ParentEdgeKind() == edge.IfStmt_Else {
631+
check = -check
632+
}
633+
if check > 0 {
634+
return true // inside nonnegative-guarded block (i >= 0 here)
635+
}
636+
if check < 0 {
637+
return false // inside negative-guarded block (i < 0 here)
638+
}
639+
case edge.BlockStmt_List:
640+
// Check preceding siblings for early-return negative checks.
641+
for sib, ok := anc.PrevSibling(); ok; sib, ok = sib.PrevSibling() {
642+
ifStmt, ok := sib.Node().(*ast.IfStmt)
643+
if ok && condChecksIdx(info, ifStmt.Cond, iObj) < 0 && bodyTerminates(ifStmt.Body) {
644+
return true // preceded by early-return negative check
645+
}
646+
}
647+
case edge.FuncDecl_Body, edge.FuncLit_Body:
648+
return false // stop at function boundary
649+
}
650+
}
651+
return false
652+
}
653+
654+
// condChecksIdx reports whether cond is a BinaryExpr that checks
655+
// the index variable iObj for negativity or non-negativity.
656+
// Returns -1 for negative (e.g. i < 0), +1 for nonnegative (e.g. i >= 0), 0 otherwise.
657+
func condChecksIdx(info *types.Info, cond ast.Expr, iObj types.Object) int {
658+
binExpr, ok := cond.(*ast.BinaryExpr)
659+
if !ok {
660+
return 0
661+
}
662+
return checkIdxComparison(info, binExpr, iObj)
663+
}
664+
665+
// bodyTerminates reports whether the given block statement unconditionally
666+
// terminates execution (via return, break, continue, or goto).
667+
func bodyTerminates(block *ast.BlockStmt) bool {
668+
if len(block.List) == 0 {
669+
return false
670+
}
671+
last := block.List[len(block.List)-1]
672+
switch last.(type) {
673+
case *ast.ReturnStmt, *ast.BranchStmt:
674+
return true // return, break, continue, goto
675+
}
676+
return false
677+
}
678+
581679
// sameObject reports whether we know that the expressions resolve to the same object.
582680
func sameObject(info *types.Info, expr1, expr2 ast.Expr) bool {
583681
if ident1, ok := expr1.(*ast.Ident); ok {

go/analysis/passes/modernize/testdata/src/stringscut/stringscut.go

Lines changed: 77 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ import (
88
func basic(s string) bool {
99
s = "reassigned"
1010
i := strings.Index(s, "=") // want "strings.Index can be simplified using strings.Cut"
11-
print(s[:i])
11+
if i >= 0 {
12+
print(s[:i])
13+
}
1214
return i >= 0
1315
}
1416

@@ -50,7 +52,9 @@ func skip_var_decl(s string) bool {
5052

5153
func basic_substr_arg(s string, substr string) bool {
5254
i := strings.Index(s, substr) // want "strings.Index can be simplified using strings.Cut"
53-
print(s[i+len(substr):])
55+
if i >= 0 {
56+
print(s[i+len(substr):])
57+
}
5458
return i >= 0
5559
}
5660

@@ -62,20 +66,26 @@ func wrong_len_arg(s string, substr string) bool {
6266

6367
func basic_strings_byte(s string) bool {
6468
i := strings.IndexByte(s, '+') // want "strings.IndexByte can be simplified using strings.Cut"
65-
print(s[:i])
69+
if i >= 0 {
70+
print(s[:i])
71+
}
6672
return i >= 0
6773
}
6874

6975
func basic_strings_byte_int(s string) bool {
7076
i := strings.IndexByte(s, 55) // want "strings.IndexByte can be simplified using strings.Cut"
71-
print(s[:i])
77+
if i >= 0 {
78+
print(s[:i])
79+
}
7280
return i >= 0
7381
}
7482

7583
func basic_strings_byte_var(s string) bool {
7684
b := byte('b')
7785
i := strings.IndexByte(s, b) // want "strings.IndexByte can be simplified using strings.Cut"
78-
print(s[:i])
86+
if i >= 0 {
87+
print(s[:i])
88+
}
7989
return i >= 0
8090
}
8191

@@ -89,7 +99,7 @@ func basic_bytes(b []byte) []byte {
8999
}
90100

91101
func basic_index_bytes(b []byte) string {
92-
i := bytes.IndexByte(b, 's') // want "bytes.IndexByte can be simplified using bytes.Cut"
102+
i := bytes.IndexByte(b, 's') // don't modernize: b[i+1:] in else is not guarded
93103
if i >= 0 {
94104
return string(b[:i])
95105
} else {
@@ -99,14 +109,20 @@ func basic_index_bytes(b []byte) string {
99109

100110
func const_substr_len(s string) bool {
101111
i := strings.Index(s, "=") // want "strings.Index can be simplified using strings.Cut"
102-
r := s[i+len("="):]
103-
return len(r) > 0
112+
if i >= 0 {
113+
r := s[i+len("="):]
114+
return len(r) > 0
115+
}
116+
return false
104117
}
105118

106119
func const_for_len(s string) bool {
107120
i := strings.Index(s, "=") // want "strings.Index can be simplified using strings.Cut"
108-
r := s[i+1:]
109-
return len(r) > 0
121+
if i >= 0 {
122+
r := s[i+1:]
123+
return len(r) > 0
124+
}
125+
return false
110126
}
111127

112128
func index(s string) bool {
@@ -154,7 +170,7 @@ func invalid_slice(s string) string {
154170

155171
func index_and_before_after(s string) string {
156172
substr := "="
157-
i := strings.Index(s, substr) // want "strings.Index can be simplified using strings.Cut"
173+
i := strings.Index(s, substr) // don't modernize: s[i+len(substr):] and s[len(substr)+i:] not nonneg-guarded
158174
if i == -1 {
159175
print("test")
160176
}
@@ -174,7 +190,7 @@ func index_and_before_after(s string) string {
174190
}
175191

176192
func idx_var_init(s string) (string, string) {
177-
var idx = strings.Index(s, "=") // want "strings.Index can be simplified using strings.Cut"
193+
var idx = strings.Index(s, "=") // don't modernize: s[0:idx] is not guarded
178194
return s[0:idx], s
179195
}
180196

@@ -219,7 +235,7 @@ func s_modified_no_params() string {
219235
func s_in_func_call() string {
220236
s := "string"
221237
substr := "substr"
222-
idx := strings.Index(s, substr) // want "strings.Index can be simplified using strings.Cut"
238+
idx := strings.Index(s, substr) // don't modernize: s[:idx] is not guarded
223239
function(s)
224240
return s[:idx]
225241
}
@@ -306,3 +322,51 @@ func reference_str(s *string) {}
306322
func reference_int(i *int) {}
307323

308324
func blank() {}
325+
326+
// Regression test for unguarded slice uses (https://go.dev/issue/77737).
327+
// The s[colon+1:] usage outside the "if" relies on -1+1=0 to return the full
328+
// string when the separator is absent. Rewriting to "after" would return "".
329+
func unguarded_after_slice(s string) (int, string) {
330+
colon := strings.Index(s, ":")
331+
if colon != -1 {
332+
print(s[:colon])
333+
}
334+
return colon, s[colon+1:] // don't modernize: s[colon+1:] not guarded
335+
}
336+
337+
// Same as above but with the guard using i < 0.
338+
func unguarded_after_slice_negcheck(s string) string {
339+
i := strings.Index(s, ":")
340+
if i < 0 {
341+
print("not found")
342+
}
343+
return s[i+1:] // don't modernize: s[i+1:] not guarded
344+
}
345+
346+
// Safe: both slice uses are inside the nonneg guard.
347+
func guarded_both_slices(s string) (string, string) {
348+
i := strings.Index(s, ":") // want "strings.Index can be simplified using strings.Cut"
349+
if i >= 0 {
350+
return s[:i], s[i+1:]
351+
}
352+
return "", s
353+
}
354+
355+
// Safe: slice uses after early-return negative check.
356+
func guarded_early_return(s string) (string, string) {
357+
i := strings.Index(s, ":") // want "strings.Index can be simplified using strings.Cut"
358+
if i < 0 {
359+
return "", s
360+
}
361+
return s[:i], s[i+1:]
362+
}
363+
364+
// Safe: slice uses in else of negative check.
365+
func guarded_neg_else(s string) (string, string) {
366+
i := strings.Index(s, ":") // want "strings.Index can be simplified using strings.Cut"
367+
if i == -1 {
368+
return "", s
369+
} else {
370+
return s[:i], s[i+1:]
371+
}
372+
}

0 commit comments

Comments
 (0)