@@ -8,6 +8,7 @@ package bytes
88
99import (
1010 "internal/bytealg"
11+ "iter"
1112 "unicode"
1213 "unicode/utf8"
1314)
@@ -50,6 +51,20 @@ func explode(s []byte, n int) [][]byte {
5051 return a [0 :na ]
5152}
5253
54+ // explodeSeq returns an iterator over the runes in s.
55+ func explodeSeq (s []byte ) iter.Seq [[]byte ] {
56+ return func (yield func ([]byte ) bool ) {
57+ s := s
58+ for len (s ) > 0 {
59+ _ , size := utf8 .DecodeRune (s )
60+ if ! yield (s [:size ]) {
61+ return
62+ }
63+ s = s [size :]
64+ }
65+ }
66+ }
67+
5368// Count counts the number of non-overlapping instances of sep in s.
5469// If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
5570func Count (s , sep []byte ) int {
@@ -318,6 +333,28 @@ func LastIndexAny(s []byte, chars string) int {
318333 return - 1
319334}
320335
336+ // Lines returns an iterator over the newline-terminated lines in the byte slice s.
337+ // The lines yielded by the iterator include their terminating newlines.
338+ // If s is empty, the iterator yields no lines at all.
339+ // If s does not end in a newline, the final yielded line will not end in a newline.
340+ func Lines (s []byte ) iter.Seq [[]byte ] {
341+ return func (yield func ([]byte ) bool ) {
342+ s := s
343+ for len (s ) > 0 {
344+ var line []byte
345+ if i := IndexByte (s , '\n' ); i >= 0 {
346+ line , s = s [:i + 1 ], s [i + 1 :]
347+ } else {
348+ line , s = s , nil
349+ }
350+ if ! yield (line ) {
351+ return
352+ }
353+ }
354+ return
355+ }
356+ }
357+
321358// Generic split: splits after each instance of sep,
322359// including sepSave bytes of sep in the subslices.
323360func genSplit (s , sep []byte , sepSave , n int ) [][]byte {
@@ -350,6 +387,29 @@ func genSplit(s, sep []byte, sepSave, n int) [][]byte {
350387 return a [:i + 1 ]
351388}
352389
390+ // splitSeq is SplitSeq or SplitAfterSeq, configured by how many
391+ // bytes of sep to include in the results (none or all).
392+ func splitSeq (s , sep []byte , sepSave int ) iter.Seq [[]byte ] {
393+ if len (sep ) == 0 {
394+ return explodeSeq (s )
395+ }
396+ return func (yield func ([]byte ) bool ) {
397+ s := s
398+ for {
399+ i := Index (s , sep )
400+ if i < 0 {
401+ break
402+ }
403+ frag := s [:i + sepSave ]
404+ if ! yield (frag ) {
405+ return
406+ }
407+ s = s [i + len (sep ):]
408+ }
409+ yield (s )
410+ }
411+ }
412+
353413// SplitN slices s into subslices separated by sep and returns a slice of
354414// the subslices between those separators.
355415// If sep is empty, SplitN splits after each UTF-8 sequence.
@@ -390,6 +450,20 @@ func SplitAfter(s, sep []byte) [][]byte {
390450 return genSplit (s , sep , len (sep ), - 1 )
391451}
392452
453+ // SplitSeq returns an iterator over all substrings of s separated by sep.
454+ // The iterator yields the same strings that would be returned by Split(s, sep),
455+ // but without constructing the slice.
456+ func SplitSeq (s , sep []byte ) iter.Seq [[]byte ] {
457+ return splitSeq (s , sep , 0 )
458+ }
459+
460+ // SplitAfterSeq returns an iterator over substrings of s split after each instance of sep.
461+ // The iterator yields the same strings that would be returned by SplitAfter(s, sep),
462+ // but without constructing the slice.
463+ func SplitAfterSeq (s , sep []byte ) iter.Seq [[]byte ] {
464+ return splitSeq (s , sep , len (sep ))
465+ }
466+
393467var asciiSpace = [256 ]uint8 {'\t' : 1 , '\n' : 1 , '\v' : 1 , '\f' : 1 , '\r' : 1 , ' ' : 1 }
394468
395469// Fields interprets s as a sequence of UTF-8-encoded code points.
@@ -446,6 +520,40 @@ func Fields(s []byte) [][]byte {
446520 return a
447521}
448522
523+ // FieldsSeq returns an iterator over substrings of s split around runs of
524+ // whitespace characters, as defined by unicode.IsSpace.
525+ // The iterator yields the same strings that would be returned by Fields(s),
526+ // but without constructing the slice.
527+ func FieldsSeq (s []byte ) iter.Seq [[]byte ] {
528+ return func (yield func ([]byte ) bool ) {
529+ s := s
530+ start := - 1
531+ for i := 0 ; i < len (s ); {
532+ size := 1
533+ r := rune (s [i ])
534+ isSpace := asciiSpace [s [i ]] != 0
535+ if r >= utf8 .RuneSelf {
536+ r , size = utf8 .DecodeRune (s [i :])
537+ isSpace = unicode .IsSpace (r )
538+ }
539+ if isSpace {
540+ if start >= 0 {
541+ if ! yield (s [start :i ]) {
542+ return
543+ }
544+ start = - 1
545+ }
546+ } else if start < 0 {
547+ start = i
548+ }
549+ i += size
550+ }
551+ if start >= 0 {
552+ yield (s [start :])
553+ }
554+ }
555+ }
556+
449557// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
450558// It splits the slice s at each run of code points c satisfying f(c) and
451559// returns a slice of subslices of s. If all code points in s satisfy f(c), or
@@ -500,6 +608,38 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
500608 return a
501609}
502610
611+ // FieldsFuncSeq returns an iterator over substrings of s split around runs of
612+ // Unicode code points satisfying f(c).
613+ // The iterator yields the same strings that would be returned by FieldsFunc(s),
614+ // but without constructing the slice.
615+ func FieldsFuncSeq (s []byte , f func (rune ) bool ) iter.Seq [[]byte ] {
616+ return func (yield func ([]byte ) bool ) {
617+ s := s
618+ start := - 1
619+ for i := 0 ; i < len (s ); {
620+ size := 1
621+ r := rune (s [i ])
622+ if r >= utf8 .RuneSelf {
623+ r , size = utf8 .DecodeRune (s [i :])
624+ }
625+ if f (r ) {
626+ if start >= 0 {
627+ if ! yield (s [start :i ]) {
628+ return
629+ }
630+ start = - 1
631+ }
632+ } else if start < 0 {
633+ start = i
634+ }
635+ i += size
636+ }
637+ if start >= 0 {
638+ yield (s [start :])
639+ }
640+ }
641+ }
642+
503643// Join concatenates the elements of s to create a new byte slice. The separator
504644// sep is placed between elements in the resulting slice.
505645func Join (s [][]byte , sep []byte ) []byte {
0 commit comments