html/template: fix string iteration in replacement operations

dspezia · robpike · commit a1c1a763bc7c · 2015-05-19T22:45:50.000Z
In css, js, and html, the replacement operations are implemented by iterating on strings (rune by rune). The for/range statement is used. The length of the rune is required and added to the index to properly slice the string. This is potentially wrong because there is a discrepancy between the result of utf8.RuneLen and the increment of the index (set by the for/range statement). For invalid strings, utf8.RuneLen('\ufffd') == 3, while the index is incremented only by 1 byte. htmlReplacer triggers a panic at slicing time for some invalid strings. Use a more robust iteration mechanism based on utf8.DecodeRuneInString, and make sure the same pattern is used for all similar functions in this package. Fixes #10799 Change-Id: Ibad3857b2819435d9fa564f06fc2ca8774102841 Reviewed-on: https://go-review.googlesource.com/10105 Reviewed-by: Rob Pike <r@golang.org>
diff --git a/src/html/template/css.go b/src/html/template/css.go
@@ -157,56 +157,20 @@ func isCSSSpace(b byte) bool {
 func cssEscaper(args ...interface{}) string {
 	s, _ := stringify(args...)
 	var b bytes.Buffer
-	written := 0
-	for i, r := range s {
+	r, w, written := rune(0), 0, 0
+	for i := 0; i < len(s); i += w {
+		// See comment in htmlEscaper.
+		r, w = utf8.DecodeRuneInString(s[i:])
 		var repl string
-		switch r {
-		case 0:
-			repl = `\0`
-		case '\t':
-			repl = `\9`
-		case '\n':
-			repl = `\a`
-		case '\f':
-			repl = `\c`
-		case '\r':
-			repl = `\d`
-		// Encode HTML specials as hex so the output can be embedded
-		// in HTML attributes without further encoding.
-		case '"':
-			repl = `\22`
-		case '&':
-			repl = `\26`
-		case '\'':
-			repl = `\27`
-		case '(':
-			repl = `\28`
-		case ')':
-			repl = `\29`
-		case '+':
-			repl = `\2b`
-		case '/':
-			repl = `\2f`
-		case ':':
-			repl = `\3a`
-		case ';':
-			repl = `\3b`
-		case '<':
-			repl = `\3c`
-		case '>':
-			repl = `\3e`
-		case '\\':
-			repl = `\\`
-		case '{':
-			repl = `\7b`
-		case '}':
-			repl = `\7d`
+		switch {
+		case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
+			repl = cssReplacementTable[r]
 		default:
 			continue
 		}
 		b.WriteString(s[written:i])
 		b.WriteString(repl)
-		written = i + utf8.RuneLen(r)
+		written = i + w
 		if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
 			b.WriteByte(' ')
 		}
@@ -218,6 +182,30 @@ func cssEscaper(args ...interface{}) string {
 	return b.String()
 }
 
+var cssReplacementTable = []string{
+	0:    `\0`,
+	'\t': `\9`,
+	'\n': `\a`,
+	'\f': `\c`,
+	'\r': `\d`,
+	// Encode HTML specials as hex so the output can be embedded
+	// in HTML attributes without further encoding.
+	'"':  `\22`,
+	'&':  `\26`,
+	'\'': `\27`,
+	'(':  `\28`,
+	')':  `\29`,
+	'+':  `\2b`,
+	'/':  `\2f`,
+	':':  `\3a`,
+	';':  `\3b`,
+	'<':  `\3c`,
+	'>':  `\3e`,
+	'\\': `\\`,
+	'{':  `\7b`,
+	'}':  `\7d`,
+}
+
 var expressionBytes = []byte("expression")
 var mozBindingBytes = []byte("mozbinding")
 
diff --git a/src/html/template/html.go b/src/html/template/html.go
@@ -138,21 +138,24 @@ var htmlNospaceNormReplacementTable = []string{
 // and when badRunes is true, certain bad runes are allowed through unescaped.
 func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
 	written, b := 0, new(bytes.Buffer)
-	for i, r := range s {
+	r, w := rune(0), 0
+	for i := 0; i < len(s); i += w {
+		// Cannot use 'for range s' because we need to preserve the width
+		// of the runes in the input. If we see a decoding error, the input
+		// width will not be utf8.Runelen(r) and we will overrun the buffer.
+		r, w = utf8.DecodeRuneInString(s[i:])
 		if int(r) < len(replacementTable) {
 			if repl := replacementTable[r]; len(repl) != 0 {
 				b.WriteString(s[written:i])
 				b.WriteString(repl)
-				// Valid as long as replacementTable doesn't
-				// include anything above 0x7f.
-				written = i + utf8.RuneLen(r)
+				written = i + w
 			}
 		} else if badRunes {
 			// No-op.
 			// IE does not allow these ranges in unquoted attrs.
 		} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
 			fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
-			written = i + utf8.RuneLen(r)
+			written = i + w
 		}
 	}
 	if written == 0 {
diff --git a/src/html/template/html_test.go b/src/html/template/html_test.go
@@ -19,7 +19,8 @@ func TestHTMLNospaceEscaper(t *testing.T) {
 		`PQRSTUVWXYZ[\]^_` +
 		"`abcdefghijklmno" +
 		"pqrstuvwxyz{|}~\x7f" +
-		"\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E")
+		"\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E" +
+		"erroneous\x960") // keep at the end
 
 	want := ("&#xfffd;\x01\x02\x03\x04\x05\x06\x07" +
 		"\x08&#9;&#10;&#11;&#12;&#13;\x0E\x0F" +
@@ -31,14 +32,16 @@ func TestHTMLNospaceEscaper(t *testing.T) {
 		`PQRSTUVWXYZ[\]^_` +
 		`&#96;abcdefghijklmno` +
 		`pqrstuvwxyz{|}~` + "\u007f" +
-		"\u00A0\u0100\u2028\u2029\ufeff&#xfdec;\U0001D11E")
+		"\u00A0\u0100\u2028\u2029\ufeff&#xfdec;\U0001D11E" +
+		"erroneous&#xfffd;0") // keep at the end
 
 	got := htmlNospaceEscaper(input)
 	if got != want {
 		t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
 	}
 
-	got, want = html.UnescapeString(got), strings.Replace(input, "\x00", "\ufffd", 1)
+	r := strings.NewReplacer("\x00", "\ufffd", "\x96", "\ufffd")
+	got, want = html.UnescapeString(got), r.Replace(input)
 	if want != got {
 		t.Errorf("decode: want\n\t%q\nbut got\n\t%q", want, got)
 	}
diff --git a/src/html/template/js.go b/src/html/template/js.go
@@ -246,8 +246,10 @@ func jsRegexpEscaper(args ...interface{}) string {
 // `\u2029`.
 func replace(s string, replacementTable []string) string {
 	var b bytes.Buffer
-	written := 0
-	for i, r := range s {
+	r, w, written := rune(0), 0, 0
+	for i := 0; i < len(s); i += w {
+		// See comment in htmlEscaper.
+		r, w = utf8.DecodeRuneInString(s[i:])
 		var repl string
 		switch {
 		case int(r) < len(replacementTable) && replacementTable[r] != "":
@@ -261,7 +263,7 @@ func replace(s string, replacementTable []string) string {
 		}
 		b.WriteString(s[written:i])
 		b.WriteString(repl)
-		written = i + utf8.RuneLen(r)
+		written = i + w
 	}
 	if written == 0 {
 		return s