Skip to content

Commit 532c44e

Browse files
committed
bytes, strings: add Lines, SplitSeq, SplitAfterSeq, FieldsSeq, FieldsFuncSeq
Fixes #61901.
1 parent d8c7230 commit 532c44e

File tree

8 files changed

+422
-2
lines changed

8 files changed

+422
-2
lines changed

api/next/61901.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pkg bytes, func FieldsFuncSeq([]uint8, func(int32) bool) iter.Seq[[]uint8] #61901
2+
pkg bytes, func FieldsSeq([]uint8) iter.Seq[[]uint8] #61901
3+
pkg bytes, func Lines([]uint8) iter.Seq[[]uint8] #61901
4+
pkg bytes, func SplitAfterSeq([]uint8, []uint8) iter.Seq[[]uint8] #61901
5+
pkg bytes, func SplitSeq([]uint8, []uint8) iter.Seq[[]uint8] #61901
6+
pkg strings, func FieldsFuncSeq(string, func(int32) bool) iter.Seq[string] #61901
7+
pkg strings, func FieldsSeq(string) iter.Seq[string] #61901
8+
pkg strings, func Lines(string) iter.Seq[string] #61901
9+
pkg strings, func SplitAfterSeq(string, string) iter.Seq[string] #61901
10+
pkg strings, func SplitSeq(string, string) iter.Seq[string] #61901

doc/next/6-stdlib/3-iter.md

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
### Iterators
2+
3+
The new [iter] package provides the basic definitions for working with
4+
user-defined iterators.
5+
6+
The [bytes] package adds several functions that work with iterators:
7+
- [Lines](/pkg/bytes#Lines) returns an iterator over the
8+
newline-terminated lines in the byte slice s.
9+
- [SplitSeq](/pkg/bytes#SplitSeq) returns an iterator over
10+
all substrings of s separated by sep.
11+
- [SplitAfterSeq](/pkg/bytes#SplitAfterSeq) returns an iterator
12+
over substrings of s split after each instance of sep.
13+
- [FieldsSeq](/pkg/bytes#FieldsSeq) returns an iterator over
14+
substrings of s split around runs of whitespace characters,
15+
as defined by unicode.IsSpace.
16+
- [FieldsFuncSeq](/pkg/bytes#FieldsFuncSeq) returns an iterator
17+
over substrings of s split around runs of Unicode code points satisfying f(c).
18+
19+
The [strings] package adds several functions that work with iterators:
20+
- [Lines](/pkg/strings#Lines) returns an iterator over
21+
the newline-terminated lines in the string s.
22+
- [SplitSeq](/pkg/strings#SplitSeq) returns an iterator over
23+
all substrings of s separated by sep.
24+
- [SplitAfterSeq](/pkg/strings#SplitAfterSeq) returns an iterator
25+
over substrings of s split after each instance of sep.
26+
- [FieldsSeq](/pkg/strings#FieldsSeq) returns an iterator over
27+
substrings of s split around runs of whitespace characters,
28+
as defined by unicode.IsSpace.
29+
- [FieldsFuncSeq](/pkg/strings#FieldsFuncSeq) returns an iterator
30+
over substrings of s split around runs of Unicode code points satisfying f(c).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<!-- see ../../3-iter.md -->
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<!-- see ../../3-iter.md -->

src/bytes/bytes.go

+140
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package bytes
88

99
import (
1010
"internal/bytealg"
11+
"iter"
1112
"unicode"
1213
"unicode/utf8"
1314
_ "unsafe" // for linkname
@@ -319,6 +320,28 @@ func LastIndexAny(s []byte, chars string) int {
319320
return -1
320321
}
321322

323+
// Lines returns an iterator over the newline-terminated lines in the byte slice s.
324+
// The lines yielded by the iterator include their terminating newlines.
325+
// If s is empty, the iterator yields no lines at all.
326+
// If s does not end in a newline, the final yielded line will not end in a newline.
327+
// It returns a single-use iterator.
328+
func Lines(s []byte) iter.Seq[[]byte] {
329+
return func(yield func([]byte) bool) {
330+
for len(s) > 0 {
331+
var line []byte
332+
if i := IndexByte(s, '\n'); i >= 0 {
333+
line, s = s[:i+1], s[i+1:]
334+
} else {
335+
line, s = s, nil
336+
}
337+
if !yield(line) {
338+
return
339+
}
340+
}
341+
return
342+
}
343+
}
344+
322345
// Generic split: splits after each instance of sep,
323346
// including sepSave bytes of sep in the subslices.
324347
func genSplit(s, sep []byte, sepSave, n int) [][]byte {
@@ -389,6 +412,57 @@ func SplitAfter(s, sep []byte) [][]byte {
389412
return genSplit(s, sep, len(sep), -1)
390413
}
391414

415+
// explodeSeq returns an iterator over the runes in s.
416+
func explodeSeq(s []byte) iter.Seq[[]byte] {
417+
return func(yield func([]byte) bool) {
418+
for len(s) > 0 {
419+
_, size := utf8.DecodeRune(s)
420+
if !yield(s[:size]) {
421+
return
422+
}
423+
s = s[size:]
424+
}
425+
}
426+
}
427+
428+
// splitSeq is SplitSeq or SplitAfterSeq, configured by how many
429+
// bytes of sep to include in the results (none or all).
430+
func splitSeq(s, sep []byte, sepSave int) iter.Seq[[]byte] {
431+
if len(sep) == 0 {
432+
return explodeSeq(s)
433+
}
434+
return func(yield func([]byte) bool) {
435+
for {
436+
i := Index(s, sep)
437+
if i < 0 {
438+
break
439+
}
440+
frag := s[:i+sepSave]
441+
if !yield(frag) {
442+
return
443+
}
444+
s = s[i+len(sep):]
445+
}
446+
yield(s)
447+
}
448+
}
449+
450+
// SplitSeq returns an iterator over all substrings of s separated by sep.
451+
// The iterator yields the same strings that would be returned by Split(s, sep),
452+
// but without constructing the slice.
453+
// It returns a single-use iterator.
454+
func SplitSeq(s, sep []byte) iter.Seq[[]byte] {
455+
return splitSeq(s, sep, 0)
456+
}
457+
458+
// SplitAfterSeq returns an iterator over substrings of s split after each instance of sep.
459+
// The iterator yields the same strings that would be returned by SplitAfter(s, sep),
460+
// but without constructing the slice.
461+
// It returns a single-use iterator.
462+
func SplitAfterSeq(s, sep []byte) iter.Seq[[]byte] {
463+
return splitSeq(s, sep, len(sep))
464+
}
465+
392466
var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
393467

394468
// Fields interprets s as a sequence of UTF-8-encoded code points.
@@ -445,6 +519,40 @@ func Fields(s []byte) [][]byte {
445519
return a
446520
}
447521

522+
// FieldsSeq returns an iterator over substrings of s split around runs of
523+
// whitespace characters, as defined by unicode.IsSpace.
524+
// The iterator yields the same strings that would be returned by Fields(s),
525+
// but without constructing the slice.
526+
func FieldsSeq(s []byte) iter.Seq[[]byte] {
527+
return func(yield func([]byte) bool) {
528+
s := s
529+
start := -1
530+
for i := 0; i < len(s); {
531+
size := 1
532+
r := rune(s[i])
533+
isSpace := asciiSpace[s[i]] != 0
534+
if r >= utf8.RuneSelf {
535+
r, size = utf8.DecodeRune(s[i:])
536+
isSpace = unicode.IsSpace(r)
537+
}
538+
if isSpace {
539+
if start >= 0 {
540+
if !yield(s[start:i]) {
541+
return
542+
}
543+
start = -1
544+
}
545+
} else if start < 0 {
546+
start = i
547+
}
548+
i += size
549+
}
550+
if start >= 0 {
551+
yield(s[start:])
552+
}
553+
}
554+
}
555+
448556
// FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
449557
// It splits the slice s at each run of code points c satisfying f(c) and
450558
// returns a slice of subslices of s. If all code points in s satisfy f(c), or
@@ -499,6 +607,38 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
499607
return a
500608
}
501609

610+
// FieldsFuncSeq returns an iterator over substrings of s split around runs of
611+
// Unicode code points satisfying f(c).
612+
// The iterator yields the same strings that would be returned by FieldsFunc(s),
613+
// but without constructing the slice.
614+
func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
615+
return func(yield func([]byte) bool) {
616+
s := s
617+
start := -1
618+
for i := 0; i < len(s); {
619+
size := 1
620+
r := rune(s[i])
621+
if r >= utf8.RuneSelf {
622+
r, size = utf8.DecodeRune(s[i:])
623+
}
624+
if f(r) {
625+
if start >= 0 {
626+
if !yield(s[start:i]) {
627+
return
628+
}
629+
start = -1
630+
}
631+
} else if start < 0 {
632+
start = i
633+
}
634+
i += size
635+
}
636+
if start >= 0 {
637+
yield(s[start:])
638+
}
639+
}
640+
}
641+
502642
// Join concatenates the elements of s to create a new byte slice. The separator
503643
// sep is placed between elements in the resulting slice.
504644
func Join(s [][]byte, sep []byte) []byte {

src/bytes/bytes_test.go

+51
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
. "bytes"
99
"fmt"
1010
"internal/testenv"
11+
"iter"
1112
"math"
1213
"math/rand"
1314
"reflect"
@@ -758,6 +759,22 @@ func BenchmarkCountSingle(b *testing.B) {
758759
})
759760
}
760761

762+
var LinesTest = []string{
763+
"abc\nabc\n",
764+
"abc\r\nabc",
765+
"abc\r\n",
766+
"abc\n",
767+
}
768+
769+
func TestLines(t *testing.T) {
770+
for _, s := range LinesTest {
771+
result := Join(slices.Collect(Lines([]byte(s))), []byte(""))
772+
if string(result) != s {
773+
t.Errorf(`Join(collect(Lines(%q)), "") = %q`, s, result)
774+
}
775+
}
776+
}
777+
761778
type SplitTest struct {
762779
s string
763780
sep string
@@ -801,6 +818,14 @@ func TestSplit(t *testing.T) {
801818
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
802819
continue
803820
}
821+
822+
if tt.n < 0 {
823+
result2 := sliceOfString(slices.Collect(SplitSeq([]byte(tt.s), []byte(tt.sep))))
824+
if !slices.Equal(result2, tt.a) {
825+
t.Errorf(`collect(SplitSeq(%q, %q)) = %v; want %v`, tt.s, tt.sep, result2, tt.a)
826+
}
827+
}
828+
804829
if tt.n == 0 || len(a) == 0 {
805830
continue
806831
}
@@ -860,6 +885,13 @@ func TestSplitAfter(t *testing.T) {
860885
continue
861886
}
862887

888+
if tt.n < 0 {
889+
result2 := sliceOfString(slices.Collect(SplitAfterSeq([]byte(tt.s), []byte(tt.sep))))
890+
if !slices.Equal(result2, tt.a) {
891+
t.Errorf(`collect(SplitAfterSeq(%q, %q)) = %v; want %v`, tt.s, tt.sep, result2, tt.a)
892+
}
893+
}
894+
863895
if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
864896
t.Errorf("last appended result was %s; want %s", x, want)
865897
}
@@ -913,6 +945,11 @@ func TestFields(t *testing.T) {
913945
continue
914946
}
915947

948+
result2 := sliceOfString(collect(t, FieldsSeq([]byte(tt.s))))
949+
if !slices.Equal(result2, tt.a) {
950+
t.Errorf(`collect(FieldsSeq(%q)) = %v; want %v`, tt.s, result2, tt.a)
951+
}
952+
916953
if string(b) != tt.s {
917954
t.Errorf("slice changed to %s; want %s", string(b), tt.s)
918955
}
@@ -955,6 +992,11 @@ func TestFieldsFunc(t *testing.T) {
955992
t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
956993
}
957994

995+
result2 := sliceOfString(collect(t, FieldsFuncSeq([]byte(tt.s), pred)))
996+
if !slices.Equal(result2, tt.a) {
997+
t.Errorf(`collect(FieldsFuncSeq(%q)) = %v; want %v`, tt.s, result2, tt.a)
998+
}
999+
9581000
if string(b) != tt.s {
9591001
t.Errorf("slice changed to %s; want %s", b, tt.s)
9601002
}
@@ -2255,3 +2297,12 @@ func TestClone(t *testing.T) {
22552297
}
22562298
}
22572299
}
2300+
2301+
func collect(t *testing.T, seq iter.Seq[[]byte]) [][]byte {
2302+
out := slices.Collect(seq)
2303+
out1 := slices.Collect(seq)
2304+
if !reflect.DeepEqual(out, out1) {
2305+
t.Fatalf("inconsistent seq:\n%s\n%s", out, out1)
2306+
}
2307+
return out
2308+
}

0 commit comments

Comments
 (0)