Skip to content

Commit 244c98f

Browse files
josselin-cianlancetaylor
authored andcommitted
encoding/base64: optimize DecodeString
Optimize base64 decoding speed by adding 32-bits and 64-bits specialized methods that don't perform any error checking and fall back to the more complex decodeQuantum method when a non-base64 character is present. On a 64-bits cpu: name old time/op new time/op delta DecodeString/2-4 70.0ns ± 6% 69.2ns ± 0% ~ (p=0.169 n=5+8) DecodeString/4-4 91.3ns ± 2% 80.4ns ± 0% -11.89% (p=0.001 n=5+10) DecodeString/8-4 126ns ± 5% 106ns ± 0% -16.14% (p=0.000 n=5+7) DecodeString/64-4 652ns ±21% 361ns ± 0% -44.57% (p=0.000 n=5+7) DecodeString/8192-4 61.0µs ±13% 31.5µs ± 1% -48.38% (p=0.001 n=5+9) name old speed new speed delta DecodeString/2-4 57.2MB/s ± 6% 57.7MB/s ± 2% ~ (p=0.419 n=5+9) DecodeString/4-4 87.7MB/s ± 2% 99.5MB/s ± 0% +13.45% (p=0.001 n=5+10) DecodeString/8-4 94.8MB/s ± 5% 112.6MB/s ± 1% +18.82% (p=0.001 n=5+9) DecodeString/64-4 136MB/s ±19% 243MB/s ± 0% +78.17% (p=0.003 n=5+7) DecodeString/8192-4 180MB/s ±11% 347MB/s ± 1% +92.94% (p=0.001 n=5+9) Improves #19636 Change-Id: Ic10a454851093a7e1d46ca0c140deed73535d990 Reviewed-on: https://go-review.googlesource.com/38632 Run-TryBot: Ian Lance Taylor <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 320b0cd commit 244c98f

File tree

4 files changed

+216
-110
lines changed

4 files changed

+216
-110
lines changed

src/cmd/dist/deps.go

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/encoding/base64/base64.go

Lines changed: 211 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package base64
77

88
import (
9+
"encoding/binary"
910
"io"
1011
"strconv"
1112
)
@@ -269,121 +270,110 @@ func (e CorruptInputError) Error() string {
269270
return "illegal base64 data at input byte " + strconv.FormatInt(int64(e), 10)
270271
}
271272

272-
// decode is like Decode but returns an additional 'end' value, which
273-
// indicates if end-of-message padding or a partial quantum was encountered
274-
// and thus any additional data is an error.
275-
func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
276-
si := 0
277-
278-
for si < len(src) && !end {
279-
// Decode quantum using the base64 alphabet
280-
var dbuf [4]byte
281-
dinc, dlen := 3, 4
282-
283-
for j := 0; j < len(dbuf); j++ {
284-
if len(src) == si {
285-
switch {
286-
case j == 0:
287-
return n, false, nil
288-
case j == 1, enc.padChar != NoPadding:
289-
return n, false, CorruptInputError(si - j)
290-
}
291-
dinc, dlen, end = j-1, j, true
292-
break
273+
// decodeQuantum decodes up to 4 base64 bytes. It takes for parameters
274+
// the destination buffer dst, the source buffer src and an index in the
275+
// source buffer si.
276+
// It returns the number of bytes read from src, the number of bytes written
277+
// to dst, and an error, if any.
278+
func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err error) {
279+
// Decode quantum using the base64 alphabet
280+
var dbuf [4]byte
281+
dinc, dlen := 3, 4
282+
283+
for j := 0; j < len(dbuf); j++ {
284+
if len(src) == si {
285+
switch {
286+
case j == 0:
287+
return si, 0, nil
288+
case j == 1, enc.padChar != NoPadding:
289+
return si, 0, CorruptInputError(si - j)
293290
}
294-
in := src[si]
291+
dinc, dlen = j-1, j
292+
break
293+
}
294+
in := src[si]
295+
si++
295296

296-
si++
297+
out := enc.decodeMap[in]
298+
if out != 0xff {
299+
dbuf[j] = out
300+
continue
301+
}
297302

298-
out := enc.decodeMap[in]
299-
if out != 0xFF {
300-
dbuf[j] = out
301-
continue
302-
}
303+
if in == '\n' || in == '\r' {
304+
j--
305+
continue
306+
}
303307

304-
if in == '\n' || in == '\r' {
305-
j--
306-
continue
307-
}
308-
if rune(in) == enc.padChar {
309-
// We've reached the end and there's padding
310-
switch j {
311-
case 0, 1:
312-
// incorrect padding
313-
return n, false, CorruptInputError(si - 1)
314-
case 2:
315-
// "==" is expected, the first "=" is already consumed.
316-
// skip over newlines
317-
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
318-
si++
319-
}
320-
if si == len(src) {
321-
// not enough padding
322-
return n, false, CorruptInputError(len(src))
323-
}
324-
if rune(src[si]) != enc.padChar {
325-
// incorrect padding
326-
return n, false, CorruptInputError(si - 1)
327-
}
328-
329-
si++
330-
}
331-
// skip over newlines
332-
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
333-
si++
334-
}
335-
if si < len(src) {
336-
// trailing garbage
337-
err = CorruptInputError(si)
338-
}
339-
dinc, dlen, end = 3, j, true
340-
break
341-
}
342-
return n, false, CorruptInputError(si - 1)
308+
if rune(in) != enc.padChar {
309+
return si, 0, CorruptInputError(si - 1)
343310
}
344311

345-
// Convert 4x 6bit source bytes into 3 bytes
346-
val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
347-
dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
348-
switch dlen {
349-
case 4:
350-
dst[2] = dbuf[2]
351-
dbuf[2] = 0
352-
fallthrough
353-
case 3:
354-
dst[1] = dbuf[1]
355-
if enc.strict && dbuf[2] != 0 {
356-
return n, end, CorruptInputError(si - 1)
357-
}
358-
dbuf[1] = 0
359-
fallthrough
312+
// We've reached the end and there's padding
313+
switch j {
314+
case 0, 1:
315+
// incorrect padding
316+
return si, 0, CorruptInputError(si - 1)
360317
case 2:
361-
dst[0] = dbuf[0]
362-
if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
363-
return n, end, CorruptInputError(si - 2)
318+
// "==" is expected, the first "=" is already consumed.
319+
// skip over newlines
320+
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
321+
si++
322+
}
323+
if si == len(src) {
324+
// not enough padding
325+
return si, 0, CorruptInputError(len(src))
364326
}
327+
if rune(src[si]) != enc.padChar {
328+
// incorrect padding
329+
return si, 0, CorruptInputError(si - 1)
330+
}
331+
332+
si++
333+
}
334+
335+
// skip over newlines
336+
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
337+
si++
338+
}
339+
if si < len(src) {
340+
// trailing garbage
341+
err = CorruptInputError(si)
365342
}
366-
dst = dst[dinc:]
367-
n += dlen - 1
343+
dinc, dlen = 3, j
344+
break
368345
}
369346

370-
return n, end, err
371-
}
347+
// Convert 4x 6bit source bytes into 3 bytes
348+
val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
349+
dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
350+
switch dlen {
351+
case 4:
352+
dst[2] = dbuf[2]
353+
dbuf[2] = 0
354+
fallthrough
355+
case 3:
356+
dst[1] = dbuf[1]
357+
if enc.strict && dbuf[2] != 0 {
358+
return si, 0, CorruptInputError(si - 1)
359+
}
360+
dbuf[1] = 0
361+
fallthrough
362+
case 2:
363+
dst[0] = dbuf[0]
364+
if enc.strict && (dbuf[1] != 0 || dbuf[2] != 0) {
365+
return si, 0, CorruptInputError(si - 2)
366+
}
367+
}
368+
dst = dst[dinc:]
372369

373-
// Decode decodes src using the encoding enc. It writes at most
374-
// DecodedLen(len(src)) bytes to dst and returns the number of bytes
375-
// written. If src contains invalid base64 data, it will return the
376-
// number of bytes successfully written and CorruptInputError.
377-
// New line characters (\r and \n) are ignored.
378-
func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
379-
n, _, err = enc.decode(dst, src)
380-
return
370+
return si, dlen - 1, err
381371
}
382372

383373
// DecodeString returns the bytes represented by the base64 string s.
384374
func (enc *Encoding) DecodeString(s string) ([]byte, error) {
385375
dbuf := make([]byte, enc.DecodedLen(len(s)))
386-
n, _, err := enc.decode(dbuf, []byte(s))
376+
n, err := enc.Decode(dbuf, []byte(s))
387377
return dbuf[:n], err
388378
}
389379

@@ -392,7 +382,6 @@ type decoder struct {
392382
readErr error // error from r.Read
393383
enc *Encoding
394384
r io.Reader
395-
end bool // saw end of message
396385
buf [1024]byte // leftover input
397386
nbuf int
398387
out []byte // leftover decoded output
@@ -430,9 +419,8 @@ func (d *decoder) Read(p []byte) (n int, err error) {
430419
if d.enc.padChar == NoPadding && d.nbuf > 0 {
431420
// Decode final fragment, without padding.
432421
var nw int
433-
nw, _, d.err = d.enc.decode(d.outbuf[:], d.buf[:d.nbuf])
422+
nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:d.nbuf])
434423
d.nbuf = 0
435-
d.end = true
436424
d.out = d.outbuf[:nw]
437425
n = copy(p, d.out)
438426
d.out = d.out[n:]
@@ -454,18 +442,138 @@ func (d *decoder) Read(p []byte) (n int, err error) {
454442
nr := d.nbuf / 4 * 4
455443
nw := d.nbuf / 4 * 3
456444
if nw > len(p) {
457-
nw, d.end, d.err = d.enc.decode(d.outbuf[:], d.buf[:nr])
445+
nw, d.err = d.enc.Decode(d.outbuf[:], d.buf[:nr])
458446
d.out = d.outbuf[:nw]
459447
n = copy(p, d.out)
460448
d.out = d.out[n:]
461449
} else {
462-
n, d.end, d.err = d.enc.decode(p, d.buf[:nr])
450+
n, d.err = d.enc.Decode(p, d.buf[:nr])
463451
}
464452
d.nbuf -= nr
465453
copy(d.buf[:d.nbuf], d.buf[nr:])
466454
return n, d.err
467455
}
468456

457+
// Decode decodes src using the encoding enc. It writes at most
458+
// DecodedLen(len(src)) bytes to dst and returns the number of bytes
459+
// written. If src contains invalid base64 data, it will return the
460+
// number of bytes successfully written and CorruptInputError.
461+
// New line characters (\r and \n) are ignored.
462+
func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
463+
if len(src) == 0 {
464+
return 0, nil
465+
}
466+
467+
si := 0
468+
ilen := len(src)
469+
olen := len(dst)
470+
for strconv.IntSize >= 64 && ilen-si >= 8 && olen-n >= 8 {
471+
if ok := enc.decode64(dst[n:], src[si:]); ok {
472+
n += 6
473+
si += 8
474+
} else {
475+
var ninc int
476+
si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
477+
n += ninc
478+
if err != nil {
479+
return n, err
480+
}
481+
}
482+
}
483+
484+
for ilen-si >= 4 && olen-n >= 4 {
485+
if ok := enc.decode32(dst[n:], src[si:]); ok {
486+
n += 3
487+
si += 4
488+
} else {
489+
var ninc int
490+
si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
491+
n += ninc
492+
if err != nil {
493+
return n, err
494+
}
495+
}
496+
}
497+
498+
for si < len(src) {
499+
var ninc int
500+
si, ninc, err = enc.decodeQuantum(dst[n:], src, si)
501+
n += ninc
502+
if err != nil {
503+
return n, err
504+
}
505+
}
506+
return n, err
507+
}
508+
509+
// decode32 tries to decode 4 base64 char into 3 bytes.
510+
// len(dst) and len(src) must both be >= 4.
511+
// Returns true if decode succeeded.
512+
func (enc *Encoding) decode32(dst, src []byte) bool {
513+
var dn, n uint32
514+
if n = uint32(enc.decodeMap[src[0]]); n == 0xff {
515+
return false
516+
}
517+
dn |= n << 26
518+
if n = uint32(enc.decodeMap[src[1]]); n == 0xff {
519+
return false
520+
}
521+
dn |= n << 20
522+
if n = uint32(enc.decodeMap[src[2]]); n == 0xff {
523+
return false
524+
}
525+
dn |= n << 14
526+
if n = uint32(enc.decodeMap[src[3]]); n == 0xff {
527+
return false
528+
}
529+
dn |= n << 8
530+
531+
binary.BigEndian.PutUint32(dst, dn)
532+
return true
533+
}
534+
535+
// decode64 tries to decode 8 base64 char into 6 bytes.
536+
// len(dst) and len(src) must both be >= 8.
537+
// Returns true if decode succeeded.
538+
func (enc *Encoding) decode64(dst, src []byte) bool {
539+
var dn, n uint64
540+
if n = uint64(enc.decodeMap[src[0]]); n == 0xff {
541+
return false
542+
}
543+
dn |= n << 58
544+
if n = uint64(enc.decodeMap[src[1]]); n == 0xff {
545+
return false
546+
}
547+
dn |= n << 52
548+
if n = uint64(enc.decodeMap[src[2]]); n == 0xff {
549+
return false
550+
}
551+
dn |= n << 46
552+
if n = uint64(enc.decodeMap[src[3]]); n == 0xff {
553+
return false
554+
}
555+
dn |= n << 40
556+
if n = uint64(enc.decodeMap[src[4]]); n == 0xff {
557+
return false
558+
}
559+
dn |= n << 34
560+
if n = uint64(enc.decodeMap[src[5]]); n == 0xff {
561+
return false
562+
}
563+
dn |= n << 28
564+
if n = uint64(enc.decodeMap[src[6]]); n == 0xff {
565+
return false
566+
}
567+
dn |= n << 22
568+
if n = uint64(enc.decodeMap[src[7]]); n == 0xff {
569+
return false
570+
}
571+
dn |= n << 16
572+
573+
binary.BigEndian.PutUint64(dst, dn)
574+
return true
575+
}
576+
469577
type newlineFilteringReader struct {
470578
wrapped io.Reader
471579
}

0 commit comments

Comments
 (0)