@@ -8,6 +8,7 @@ package bytes
8
8
9
9
import (
10
10
"internal/bytealg"
11
+ "iter"
11
12
"unicode"
12
13
"unicode/utf8"
13
14
_ "unsafe" // for linkname
@@ -319,6 +320,28 @@ func LastIndexAny(s []byte, chars string) int {
319
320
return - 1
320
321
}
321
322
323
+ // Lines returns an iterator over the newline-terminated lines in the byte slice s.
324
+ // The lines yielded by the iterator include their terminating newlines.
325
+ // If s is empty, the iterator yields no lines at all.
326
+ // If s does not end in a newline, the final yielded line will not end in a newline.
327
+ // It returns a single-use iterator.
328
+ func Lines (s []byte ) iter.Seq [[]byte ] {
329
+ return func (yield func ([]byte ) bool ) {
330
+ for len (s ) > 0 {
331
+ var line []byte
332
+ if i := IndexByte (s , '\n' ); i >= 0 {
333
+ line , s = s [:i + 1 ], s [i + 1 :]
334
+ } else {
335
+ line , s = s , nil
336
+ }
337
+ if ! yield (line ) {
338
+ return
339
+ }
340
+ }
341
+ return
342
+ }
343
+ }
344
+
322
345
// Generic split: splits after each instance of sep,
323
346
// including sepSave bytes of sep in the subslices.
324
347
func genSplit (s , sep []byte , sepSave , n int ) [][]byte {
@@ -389,6 +412,57 @@ func SplitAfter(s, sep []byte) [][]byte {
389
412
return genSplit (s , sep , len (sep ), - 1 )
390
413
}
391
414
415
+ // explodeSeq returns an iterator over the runes in s.
416
+ func explodeSeq (s []byte ) iter.Seq [[]byte ] {
417
+ return func (yield func ([]byte ) bool ) {
418
+ for len (s ) > 0 {
419
+ _ , size := utf8 .DecodeRune (s )
420
+ if ! yield (s [:size ]) {
421
+ return
422
+ }
423
+ s = s [size :]
424
+ }
425
+ }
426
+ }
427
+
428
+ // splitSeq is SplitSeq or SplitAfterSeq, configured by how many
429
+ // bytes of sep to include in the results (none or all).
430
+ func splitSeq (s , sep []byte , sepSave int ) iter.Seq [[]byte ] {
431
+ if len (sep ) == 0 {
432
+ return explodeSeq (s )
433
+ }
434
+ return func (yield func ([]byte ) bool ) {
435
+ for {
436
+ i := Index (s , sep )
437
+ if i < 0 {
438
+ break
439
+ }
440
+ frag := s [:i + sepSave ]
441
+ if ! yield (frag ) {
442
+ return
443
+ }
444
+ s = s [i + len (sep ):]
445
+ }
446
+ yield (s )
447
+ }
448
+ }
449
+
450
+ // SplitSeq returns an iterator over all substrings of s separated by sep.
451
+ // The iterator yields the same strings that would be returned by Split(s, sep),
452
+ // but without constructing the slice.
453
+ // It returns a single-use iterator.
454
+ func SplitSeq (s , sep []byte ) iter.Seq [[]byte ] {
455
+ return splitSeq (s , sep , 0 )
456
+ }
457
+
458
+ // SplitAfterSeq returns an iterator over substrings of s split after each instance of sep.
459
+ // The iterator yields the same strings that would be returned by SplitAfter(s, sep),
460
+ // but without constructing the slice.
461
+ // It returns a single-use iterator.
462
+ func SplitAfterSeq (s , sep []byte ) iter.Seq [[]byte ] {
463
+ return splitSeq (s , sep , len (sep ))
464
+ }
465
+
392
466
var asciiSpace = [256 ]uint8 {'\t' : 1 , '\n' : 1 , '\v' : 1 , '\f' : 1 , '\r' : 1 , ' ' : 1 }
393
467
394
468
// Fields interprets s as a sequence of UTF-8-encoded code points.
@@ -445,6 +519,40 @@ func Fields(s []byte) [][]byte {
445
519
return a
446
520
}
447
521
522
+ // FieldsSeq returns an iterator over substrings of s split around runs of
523
+ // whitespace characters, as defined by unicode.IsSpace.
524
+ // The iterator yields the same strings that would be returned by Fields(s),
525
+ // but without constructing the slice.
526
+ func FieldsSeq (s []byte ) iter.Seq [[]byte ] {
527
+ return func (yield func ([]byte ) bool ) {
528
+ s := s
529
+ start := - 1
530
+ for i := 0 ; i < len (s ); {
531
+ size := 1
532
+ r := rune (s [i ])
533
+ isSpace := asciiSpace [s [i ]] != 0
534
+ if r >= utf8 .RuneSelf {
535
+ r , size = utf8 .DecodeRune (s [i :])
536
+ isSpace = unicode .IsSpace (r )
537
+ }
538
+ if isSpace {
539
+ if start >= 0 {
540
+ if ! yield (s [start :i ]) {
541
+ return
542
+ }
543
+ start = - 1
544
+ }
545
+ } else if start < 0 {
546
+ start = i
547
+ }
548
+ i += size
549
+ }
550
+ if start >= 0 {
551
+ yield (s [start :])
552
+ }
553
+ }
554
+ }
555
+
448
556
// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
449
557
// It splits the slice s at each run of code points c satisfying f(c) and
450
558
// returns a slice of subslices of s. If all code points in s satisfy f(c), or
@@ -499,6 +607,38 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
499
607
return a
500
608
}
501
609
610
+ // FieldsFuncSeq returns an iterator over substrings of s split around runs of
611
+ // Unicode code points satisfying f(c).
612
+ // The iterator yields the same strings that would be returned by FieldsFunc(s),
613
+ // but without constructing the slice.
614
+ func FieldsFuncSeq (s []byte , f func (rune ) bool ) iter.Seq [[]byte ] {
615
+ return func (yield func ([]byte ) bool ) {
616
+ s := s
617
+ start := - 1
618
+ for i := 0 ; i < len (s ); {
619
+ size := 1
620
+ r := rune (s [i ])
621
+ if r >= utf8 .RuneSelf {
622
+ r , size = utf8 .DecodeRune (s [i :])
623
+ }
624
+ if f (r ) {
625
+ if start >= 0 {
626
+ if ! yield (s [start :i ]) {
627
+ return
628
+ }
629
+ start = - 1
630
+ }
631
+ } else if start < 0 {
632
+ start = i
633
+ }
634
+ i += size
635
+ }
636
+ if start >= 0 {
637
+ yield (s [start :])
638
+ }
639
+ }
640
+ }
641
+
502
642
// Join concatenates the elements of s to create a new byte slice. The separator
503
643
// sep is placed between elements in the resulting slice.
504
644
func Join (s [][]byte , sep []byte ) []byte {
0 commit comments