Skip to content

Commit 6e9891a

Browse files
committed
codec: clean up, improved symbol handling and numeric overflow support
Binc Handling and Symbols codec: remove AsSymbols from EncodeOption, and allow a modified version for binc only Currently, encDriver defines an EncodeSymbol function, that is only called when writing struct fields or map keys of string type. If we take out the differentiation, this can easily be handled by a specific handle, as it can track what the current containerState is. Instead, we remove all vestiges of Symbol support from encode.go and have it be something unique to binc. This also reduces the work that is done for all handles. codec: remove overflow checking from Handles - manage it at the framework This means the following: - change decDriver DecodeUint and DecodeInt to just DecodeUint64 and DecodeInt64 - add overflow check logic into checkOverflow type - use the overflow check logic in one-line statements across codebase codec: fast-path: hoist conditional check out of loop Misc cleanup - clean up panic calls - clean up some comments - clean up error messages - optimize tracking TypeInfos by reusing same array if possible
1 parent e60f01b commit 6e9891a

20 files changed

+5812
-4189
lines changed

codec/0doc.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -225,11 +225,11 @@ package codec
225225
// - In Go 1.10, when mid-stack inlining is enabled,
226226
// we should use committed functions for writeXXX and readXXX calls.
227227
// This involves uncommenting the methods for decReaderSwitch and encWriterSwitch
228-
// and using those (decReaderSwitch and encWriterSwitch in all handles
228+
// and using those (decReaderSwitch and encWriterSwitch) in all handles
229229
// instead of encWriter and decReader.
230-
// - removing conditionals used to avoid calling no-op functions via interface calls.
231-
// esep, etc.
232-
// It *should* make the code cleaner, and maybe more performant,
233-
// as conditional branches are expensive.
234-
// However, per https://groups.google.com/forum/#!topic/golang-nuts/DNELyNnTzFA ,
235-
// there is no optimization if calling an empty function via an interface.
230+
// The benefit is that, for the (En|De)coder over []byte, the encWriter/decReader
231+
// will be inlined, giving a performance bump for that typical case.
232+
// However, it will only be inlined if mid-stack inlining is enabled,
233+
// as we call panic to raise errors, and panic currently prevents inlining.
234+
// - Clean up comments in the codebase
235+
// Remove all unnecesssary comments, so code is clean.

codec/binc.go

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,15 @@ const (
5757

5858
type bincEncDriver struct {
5959
e *Encoder
60+
h *BincHandle
6061
w encWriter
6162
m map[string]uint16 // symbols
6263
b [scratchByteArrayLen]byte
6364
s uint16 // symbols sequencer
64-
// encNoSeparator
65-
encDriverNoopContainerWriter
65+
// c containerState
66+
encDriverTrackContainerWriter
6667
noBuiltInTypes
68+
// encNoSeparator
6769
}
6870

6971
// func (e *bincEncDriver) IsBuiltinType(rt uintptr) bool {
@@ -201,13 +203,19 @@ func (e *bincEncDriver) encodeExtPreamble(xtag byte, length int) {
201203

202204
func (e *bincEncDriver) WriteArrayStart(length int) {
203205
e.encLen(bincVdArray<<4, uint64(length))
206+
e.c = containerArrayStart
204207
}
205208

206209
func (e *bincEncDriver) WriteMapStart(length int) {
207210
e.encLen(bincVdMap<<4, uint64(length))
211+
e.c = containerMapStart
208212
}
209213

210214
func (e *bincEncDriver) EncodeString(c charEncoding, v string) {
215+
if e.c == containerMapKey && c == cUTF8 && (e.h.AsSymbols == 0 || e.h.AsSymbols == 1) {
216+
e.EncodeSymbol(v)
217+
return
218+
}
211219
l := uint64(len(v))
212220
e.encBytesLen(c, l)
213221
if l > 0 {
@@ -522,34 +530,22 @@ func (d *bincDecDriver) decCheckInteger() (ui uint64, neg bool) {
522530
return
523531
}
524532

525-
func (d *bincDecDriver) DecodeInt(bitsize uint8) (i int64) {
533+
func (d *bincDecDriver) DecodeInt64() (i int64) {
526534
ui, neg := d.decCheckInteger()
527-
i, overflow := chkOvf.SignedInt(ui)
528-
if overflow {
529-
d.d.errorf("simple: overflow converting %v to signed integer", ui)
530-
return
531-
}
535+
i = chkOvf.SignedIntV(ui)
532536
if neg {
533537
i = -i
534538
}
535-
if chkOvf.Int(i, bitsize) {
536-
d.d.errorf("binc: overflow integer: %v for num bits: %v", i, bitsize)
537-
return
538-
}
539539
d.bdRead = false
540540
return
541541
}
542542

543-
func (d *bincDecDriver) DecodeUint(bitsize uint8) (ui uint64) {
543+
func (d *bincDecDriver) DecodeUint64() (ui uint64) {
544544
ui, neg := d.decCheckInteger()
545545
if neg {
546546
d.d.errorf("Assigning negative signed value to unsigned type")
547547
return
548548
}
549-
if chkOvf.Uint(ui, bitsize) {
550-
d.d.errorf("binc: overflow integer: %v", ui)
551-
return
552-
}
553549
d.bdRead = false
554550
return
555551
}
@@ -576,7 +572,7 @@ func (d *bincDecDriver) DecodeFloat64() (f float64) {
576572
} else if vd == bincVdFloat {
577573
f = d.decFloat()
578574
} else {
579-
f = float64(d.DecodeInt(64))
575+
f = float64(d.DecodeInt64())
580576
}
581577
d.bdRead = false
582578
return
@@ -932,6 +928,26 @@ type BincHandle struct {
932928
BasicHandle
933929
binaryEncodingType
934930
noElemSeparators
931+
932+
// AsSymbols defines what should be encoded as symbols.
933+
//
934+
// Encoding as symbols can reduce the encoded size significantly.
935+
//
936+
// However, during decoding, each string to be encoded as a symbol must
937+
// be checked to see if it has been seen before. Consequently, encoding time
938+
// will increase if using symbols, because string comparisons has a clear cost.
939+
//
940+
// Values:
941+
// - 0: default: library uses best judgement
942+
// - 1: use symbols
943+
// - 2: do not use symbols
944+
AsSymbols byte
945+
946+
// AsSymbols: may later on introduce more options ...
947+
// - m: map keys
948+
// - s: struct fields
949+
// - n: none
950+
// - a: all: same as m, s, ...
935951
}
936952

937953
// Name returns the name of the handle: binc
@@ -943,7 +959,7 @@ func (h *BincHandle) SetBytesExt(rt reflect.Type, tag uint64, ext BytesExt) (err
943959
}
944960

945961
func (h *BincHandle) newEncDriver(e *Encoder) encDriver {
946-
return &bincEncDriver{e: e, w: e.w}
962+
return &bincEncDriver{e: e, h: h, w: e.w}
947963
}
948964

949965
func (h *BincHandle) newDecDriver(d *Decoder) decDriver {
@@ -959,6 +975,7 @@ func (h *BincHandle) newDecDriver(d *Decoder) decDriver {
959975
func (e *bincEncDriver) reset() {
960976
e.w = e.e.w
961977
e.s = 0
978+
e.c = 0
962979
e.m = nil
963980
}
964981

codec/cbor.go

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,9 @@ func (e *cborEncDriver) WriteArrayEnd() {
193193
}
194194
}
195195

196-
func (e *cborEncDriver) EncodeSymbol(v string) {
197-
e.encStringBytesS(cborBaseString, v)
198-
}
196+
// func (e *cborEncDriver) EncodeSymbol(v string) {
197+
// e.encStringBytesS(cborBaseString, v)
198+
// }
199199

200200
func (e *cborEncDriver) EncodeString(c charEncoding, v string) {
201201
e.encStringBytesS(cborBaseString, v)
@@ -350,41 +350,25 @@ func (d *cborDecDriver) decCheckInteger() (neg bool) {
350350
return
351351
}
352352

353-
func (d *cborDecDriver) DecodeInt(bitsize uint8) (i int64) {
353+
func (d *cborDecDriver) DecodeInt64() (i int64) {
354354
neg := d.decCheckInteger()
355355
ui := d.decUint()
356356
// check if this number can be converted to an int without overflow
357-
var overflow bool
358357
if neg {
359-
if i, overflow = chkOvf.SignedInt(ui + 1); overflow {
360-
d.d.errorf("cbor: overflow converting %v to signed integer", ui+1)
361-
return
362-
}
363-
i = -i
358+
i = -(chkOvf.SignedIntV(ui + 1))
364359
} else {
365-
if i, overflow = chkOvf.SignedInt(ui); overflow {
366-
d.d.errorf("cbor: overflow converting %v to signed integer", ui)
367-
return
368-
}
369-
}
370-
if chkOvf.Int(i, bitsize) {
371-
d.d.errorf("cbor: overflow integer: %v", i)
372-
return
360+
i = chkOvf.SignedIntV(ui)
373361
}
374362
d.bdRead = false
375363
return
376364
}
377365

378-
func (d *cborDecDriver) DecodeUint(bitsize uint8) (ui uint64) {
366+
func (d *cborDecDriver) DecodeUint64() (ui uint64) {
379367
if d.decCheckInteger() {
380368
d.d.errorf("Assigning negative signed value to unsigned type")
381369
return
382370
}
383371
ui = d.decUint()
384-
if chkOvf.Uint(ui, bitsize) {
385-
d.d.errorf("cbor: overflow integer: %v", ui)
386-
return
387-
}
388372
d.bdRead = false
389373
return
390374
}
@@ -400,7 +384,7 @@ func (d *cborDecDriver) DecodeFloat64() (f float64) {
400384
} else if bd == cborBdFloat64 {
401385
f = math.Float64frombits(bigen.Uint64(d.r.readx(8)))
402386
} else if bd >= cborBaseUint && bd < cborBaseBytes {
403-
f = float64(d.DecodeInt(64))
387+
f = float64(d.DecodeInt64())
404388
} else {
405389
d.d.errorf("Float only valid from float16/32/64: Invalid descriptor: %v", bd)
406390
return
@@ -458,7 +442,7 @@ func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte {
458442
break
459443
}
460444
if major := d.bd >> 5; major != cborMajorBytes && major != cborMajorText {
461-
d.d.errorf("cbor: expect bytes or string major type in indefinite string/bytes; got: %v, byte: %v", major, d.bd)
445+
d.d.errorf("expect bytes or string major type in indefinite string/bytes; got: %v, byte: %v", major, d.bd)
462446
return nil
463447
}
464448
n := d.decLen()
@@ -553,12 +537,12 @@ func (d *cborDecDriver) decodeTime(xtag uint64) (t time.Time) {
553537
f1, f2 := math.Modf(d.DecodeFloat64())
554538
t = time.Unix(int64(f1), int64(f2*1e9))
555539
case d.bd >= cborBaseUint && d.bd < cborBaseNegInt, d.bd >= cborBaseNegInt && d.bd < cborBaseBytes:
556-
t = time.Unix(d.DecodeInt(64), 0)
540+
t = time.Unix(d.DecodeInt64(), 0)
557541
default:
558-
d.d.errorf("cbor: time.Time can only be decoded from a number (or RFC3339 string)")
542+
d.d.errorf("time.Time can only be decoded from a number (or RFC3339 string)")
559543
}
560544
default:
561-
d.d.errorf("cbor: invalid tag for time.Time - expecting 0 or 1, got 0x%x", xtag)
545+
d.d.errorf("invalid tag for time.Time - expecting 0 or 1, got 0x%x", xtag)
562546
}
563547
t = t.UTC().Round(time.Microsecond)
564548
return
@@ -624,14 +608,14 @@ func (d *cborDecDriver) DecodeNaked() {
624608
case d.bd >= cborBaseUint && d.bd < cborBaseNegInt:
625609
if d.h.SignedInteger {
626610
n.v = valueTypeInt
627-
n.i = d.DecodeInt(64)
611+
n.i = d.DecodeInt64()
628612
} else {
629613
n.v = valueTypeUint
630-
n.u = d.DecodeUint(64)
614+
n.u = d.DecodeUint64()
631615
}
632616
case d.bd >= cborBaseNegInt && d.bd < cborBaseBytes:
633617
n.v = valueTypeInt
634-
n.i = d.DecodeInt(64)
618+
n.i = d.DecodeInt64()
635619
case d.bd >= cborBaseBytes && d.bd < cborBaseString:
636620
n.v = valueTypeBytes
637621
n.l = d.DecodeBytes(nil, false)

codec/codec_test.go

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,8 @@ func testInit() {
346346
// pre-fill them first
347347
bh.EncodeOptions = testEncodeOptions
348348
bh.DecodeOptions = testDecodeOptions
349-
// bh.InterfaceReset = true // TODO: remove
350-
// bh.PreferArrayOverSlice = true // TODO: remove
349+
// bh.InterfaceReset = true
350+
// bh.PreferArrayOverSlice = true
351351
// modify from flag'ish things
352352
bh.InternString = testInternStr
353353
bh.Canonical = testCanonical
@@ -1878,8 +1878,6 @@ func doTestLargeContainerLen(t *testing.T, h Handle) {
18781878
testUnmarshalErr(m2, bs, h, t, "-")
18791879
testDeepEqualErr(m, m2, t, "-")
18801880

1881-
// TODO: skip rest if 32-bit
1882-
18831881
// do same tests for large strings (encoded as symbols or not)
18841882
// skip if 32-bit or not using unsafe mode
18851883
if safeMode || (32<<(^uint(0)>>63)) < 64 {
@@ -1891,10 +1889,11 @@ func doTestLargeContainerLen(t *testing.T, h Handle) {
18911889
// to do this, we create a simple one-field struct,
18921890
// use use flags to switch from symbols to non-symbols
18931891

1894-
bh := h.getBasicHandle()
1895-
oldAsSymbols := bh.AsSymbols
1896-
defer func() { bh.AsSymbols = oldAsSymbols }()
1897-
1892+
hbinc, okbinc := h.(*BincHandle)
1893+
if okbinc {
1894+
oldAsSymbols := hbinc.AsSymbols
1895+
defer func() { hbinc.AsSymbols = oldAsSymbols }()
1896+
}
18981897
var out []byte = make([]byte, 0, math.MaxUint16*3/2)
18991898
var in []byte = make([]byte, math.MaxUint16*3/2)
19001899
for i := range in {
@@ -1915,7 +1914,9 @@ func doTestLargeContainerLen(t *testing.T, h Handle) {
19151914
// fmt.Printf("testcontainerlen: large string: i: %v, |%s|\n", i, s1)
19161915
m1[s1] = true
19171916

1918-
bh.AsSymbols = AsSymbolNone
1917+
if okbinc {
1918+
hbinc.AsSymbols = 2
1919+
}
19191920
out = out[:0]
19201921
e.ResetBytes(&out)
19211922
e.MustEncode(m1)
@@ -1924,15 +1925,17 @@ func doTestLargeContainerLen(t *testing.T, h Handle) {
19241925
testUnmarshalErr(m2, out, h, t, "no-symbols")
19251926
testDeepEqualErr(m1, m2, t, "no-symbols")
19261927

1927-
// now, do as symbols
1928-
bh.AsSymbols = AsSymbolAll
1929-
out = out[:0]
1930-
e.ResetBytes(&out)
1931-
e.MustEncode(m1)
1932-
// bs, _ = testMarshalErr(m1, h, t, "-")
1933-
m2 = make(map[string]bool, 1)
1934-
testUnmarshalErr(m2, out, h, t, "symbols")
1935-
testDeepEqualErr(m1, m2, t, "symbols")
1928+
if okbinc {
1929+
// now, do as symbols
1930+
hbinc.AsSymbols = 1
1931+
out = out[:0]
1932+
e.ResetBytes(&out)
1933+
e.MustEncode(m1)
1934+
// bs, _ = testMarshalErr(m1, h, t, "-")
1935+
m2 = make(map[string]bool, 1)
1936+
testUnmarshalErr(m2, out, h, t, "symbols")
1937+
testDeepEqualErr(m1, m2, t, "symbols")
1938+
}
19361939
}
19371940

19381941
}

0 commit comments

Comments
 (0)