Skip to content

Commit 0add9a4

Browse files
committed
encoding/csv: avoid mangling invalid UTF-8 in Writer
In the situation where a quoted field is necessary, avoid processing each UTF-8 rune one-by-one, which causes mangling of invalid sequences into utf8.RuneError, causing a loss of information. Instead, search only for the escaped characters, handle those specially and copy everything else in between verbatim. This symmetrically matches the behavior of Reader. Fixes #24298 Change-Id: I9276f64891084ce8487678f663fad711b4095dbb Reviewed-on: https://go-review.googlesource.com/99297 Run-TryBot: Joe Tsai <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 88466e9 commit 0add9a4

File tree

2 files changed

+34
-19
lines changed

2 files changed

+34
-19
lines changed

src/encoding/csv/writer.go

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error {
5757
}
5858
continue
5959
}
60+
6061
if err := w.w.WriteByte('"'); err != nil {
6162
return err
6263
}
64+
for len(field) > 0 {
65+
// Search for special characters.
66+
i := strings.IndexAny(field, "\"\r\n")
67+
if i < 0 {
68+
i = len(field)
69+
}
70+
71+
// Copy verbatim everything before the special character.
72+
if _, err := w.w.WriteString(field[:i]); err != nil {
73+
return err
74+
}
75+
field = field[i:]
6376

64-
for _, r1 := range field {
65-
var err error
66-
switch r1 {
67-
case '"':
68-
_, err = w.w.WriteString(`""`)
69-
case '\r':
70-
if !w.UseCRLF {
71-
err = w.w.WriteByte('\r')
77+
// Encode the special character.
78+
if len(field) > 0 {
79+
var err error
80+
switch field[0] {
81+
case '"':
82+
_, err = w.w.WriteString(`""`)
83+
case '\r':
84+
if !w.UseCRLF {
85+
err = w.w.WriteByte('\r')
86+
}
87+
case '\n':
88+
if w.UseCRLF {
89+
_, err = w.w.WriteString("\r\n")
90+
} else {
91+
err = w.w.WriteByte('\n')
92+
}
7293
}
73-
case '\n':
74-
if w.UseCRLF {
75-
_, err = w.w.WriteString("\r\n")
76-
} else {
77-
err = w.w.WriteByte('\n')
94+
field = field[1:]
95+
if err != nil {
96+
return err
7897
}
79-
default:
80-
_, err = w.w.WriteRune(r1)
81-
}
82-
if err != nil {
83-
return err
8498
}
8599
}
86-
87100
if err := w.w.WriteByte('"'); err != nil {
88101
return err
89102
}

src/encoding/csv/writer_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ var writeTests = []struct {
3939
{Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"},
4040
{Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"},
4141
{Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"},
42+
{Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"},
43+
{Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"},
4244
}
4345

4446
func TestWrite(t *testing.T) {

0 commit comments

Comments
 (0)