@@ -8,6 +8,7 @@ package bytes
8
8
9
9
import (
10
10
"internal/bytealg"
11
+ "iter"
11
12
"unicode"
12
13
"unicode/utf8"
13
14
)
@@ -50,6 +51,20 @@ func explode(s []byte, n int) [][]byte {
50
51
return a [0 :na ]
51
52
}
52
53
54
+ // explodeSeq returns an iterator over the runes in s.
55
+ func explodeSeq (s []byte ) iter.Seq [[]byte ] {
56
+ return func (yield func ([]byte ) bool ) {
57
+ s := s
58
+ for len (s ) > 0 {
59
+ _ , size := utf8 .DecodeRune (s )
60
+ if ! yield (s [:size ]) {
61
+ return
62
+ }
63
+ s = s [size :]
64
+ }
65
+ }
66
+ }
67
+
53
68
// Count counts the number of non-overlapping instances of sep in s.
54
69
// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
55
70
func Count (s , sep []byte ) int {
@@ -318,6 +333,28 @@ func LastIndexAny(s []byte, chars string) int {
318
333
return - 1
319
334
}
320
335
336
+ // Lines returns an iterator over the newline-terminated lines in the byte slice s.
337
+ // The lines yielded by the iterator include their terminating newlines.
338
+ // If s is empty, the iterator yields no lines at all.
339
+ // If s does not end in a newline, the final yielded line will not end in a newline.
340
+ func Lines (s []byte ) iter.Seq [[]byte ] {
341
+ return func (yield func ([]byte ) bool ) {
342
+ s := s
343
+ for len (s ) > 0 {
344
+ var line []byte
345
+ if i := IndexByte (s , '\n' ); i >= 0 {
346
+ line , s = s [:i + 1 ], s [i + 1 :]
347
+ } else {
348
+ line , s = s , nil
349
+ }
350
+ if ! yield (line ) {
351
+ return
352
+ }
353
+ }
354
+ return
355
+ }
356
+ }
357
+
321
358
// Generic split: splits after each instance of sep,
322
359
// including sepSave bytes of sep in the subslices.
323
360
func genSplit (s , sep []byte , sepSave , n int ) [][]byte {
@@ -350,6 +387,29 @@ func genSplit(s, sep []byte, sepSave, n int) [][]byte {
350
387
return a [:i + 1 ]
351
388
}
352
389
390
+ // splitSeq is SplitSeq or SplitAfterSeq, configured by how many
391
+ // bytes of sep to include in the results (none or all).
392
+ func splitSeq (s , sep []byte , sepSave int ) iter.Seq [[]byte ] {
393
+ if len (sep ) == 0 {
394
+ return explodeSeq (s )
395
+ }
396
+ return func (yield func ([]byte ) bool ) {
397
+ s := s
398
+ for {
399
+ i := Index (s , sep )
400
+ if i < 0 {
401
+ break
402
+ }
403
+ frag := s [:i + sepSave ]
404
+ if ! yield (frag ) {
405
+ return
406
+ }
407
+ s = s [i + len (sep ):]
408
+ }
409
+ yield (s )
410
+ }
411
+ }
412
+
353
413
// SplitN slices s into subslices separated by sep and returns a slice of
354
414
// the subslices between those separators.
355
415
// If sep is empty, SplitN splits after each UTF-8 sequence.
@@ -390,6 +450,20 @@ func SplitAfter(s, sep []byte) [][]byte {
390
450
return genSplit (s , sep , len (sep ), - 1 )
391
451
}
392
452
453
+ // SplitSeq returns an iterator over all substrings of s separated by sep.
454
+ // The iterator yields the same strings that would be returned by Split(s, sep),
455
+ // but without constructing the slice.
456
+ func SplitSeq (s , sep []byte ) iter.Seq [[]byte ] {
457
+ return splitSeq (s , sep , 0 )
458
+ }
459
+
460
+ // SplitAfterSeq returns an iterator over substrings of s split after each instance of sep.
461
+ // The iterator yields the same strings that would be returned by SplitAfter(s, sep),
462
+ // but without constructing the slice.
463
+ func SplitAfterSeq (s , sep []byte ) iter.Seq [[]byte ] {
464
+ return splitSeq (s , sep , len (sep ))
465
+ }
466
+
393
467
var asciiSpace = [256 ]uint8 {'\t' : 1 , '\n' : 1 , '\v' : 1 , '\f' : 1 , '\r' : 1 , ' ' : 1 }
394
468
395
469
// Fields interprets s as a sequence of UTF-8-encoded code points.
@@ -446,6 +520,40 @@ func Fields(s []byte) [][]byte {
446
520
return a
447
521
}
448
522
523
+ // FieldsSeq returns an iterator over substrings of s split around runs of
524
+ // whitespace characters, as defined by unicode.IsSpace.
525
+ // The iterator yields the same strings that would be returned by Fields(s),
526
+ // but without constructing the slice.
527
+ func FieldsSeq (s []byte ) iter.Seq [[]byte ] {
528
+ return func (yield func ([]byte ) bool ) {
529
+ s := s
530
+ start := - 1
531
+ for i := 0 ; i < len (s ); {
532
+ size := 1
533
+ r := rune (s [i ])
534
+ isSpace := asciiSpace [s [i ]] != 0
535
+ if r >= utf8 .RuneSelf {
536
+ r , size = utf8 .DecodeRune (s [i :])
537
+ isSpace = unicode .IsSpace (r )
538
+ }
539
+ if isSpace {
540
+ if start >= 0 {
541
+ if ! yield (s [start :i ]) {
542
+ return
543
+ }
544
+ start = - 1
545
+ }
546
+ } else if start < 0 {
547
+ start = i
548
+ }
549
+ i += size
550
+ }
551
+ if start >= 0 {
552
+ yield (s [start :])
553
+ }
554
+ }
555
+ }
556
+
449
557
// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
450
558
// It splits the slice s at each run of code points c satisfying f(c) and
451
559
// returns a slice of subslices of s. If all code points in s satisfy f(c), or
@@ -500,6 +608,38 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
500
608
return a
501
609
}
502
610
611
+ // FieldsFuncSeq returns an iterator over substrings of s split around runs of
612
+ // Unicode code points satisfying f(c).
613
+ // The iterator yields the same strings that would be returned by FieldsFunc(s),
614
+ // but without constructing the slice.
615
+ func FieldsFuncSeq (s []byte , f func (rune ) bool ) iter.Seq [[]byte ] {
616
+ return func (yield func ([]byte ) bool ) {
617
+ s := s
618
+ start := - 1
619
+ for i := 0 ; i < len (s ); {
620
+ size := 1
621
+ r := rune (s [i ])
622
+ if r >= utf8 .RuneSelf {
623
+ r , size = utf8 .DecodeRune (s [i :])
624
+ }
625
+ if f (r ) {
626
+ if start >= 0 {
627
+ if ! yield (s [start :i ]) {
628
+ return
629
+ }
630
+ start = - 1
631
+ }
632
+ } else if start < 0 {
633
+ start = i
634
+ }
635
+ i += size
636
+ }
637
+ if start >= 0 {
638
+ yield (s [start :])
639
+ }
640
+ }
641
+ }
642
+
503
643
// Join concatenates the elements of s to create a new byte slice. The separator
504
644
// sep is placed between elements in the resulting slice.
505
645
func Join (s [][]byte , sep []byte ) []byte {
0 commit comments