Skip to content

zstd: Increment buffer offset #263

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions zstd/enc_dfast.go
Original file line number Diff line number Diff line change
Expand Up @@ -671,4 +671,8 @@ encodeLoop:
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}

// We do not store history, so we must offset e.cur to avoid false matches for next user.
if e.cur < bufferReset {
e.cur += int32(len(src))
}
}
15 changes: 13 additions & 2 deletions zstd/enc_fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
panic("src too big")
}
}

// Protect against e.cur wraparound.
if e.cur >= bufferReset {
for i := range e.table[:] {
Expand Down Expand Up @@ -516,6 +517,9 @@ encodeLoop:
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugAsserts && t < 0 {
panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
}
break
}

Expand Down Expand Up @@ -548,6 +552,9 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}

if debugAsserts && t < 0 {
panic(fmt.Sprintf("t (%d) < 0 ", t))
}
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
Expand Down Expand Up @@ -647,6 +654,10 @@ encodeLoop:
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
if e.cur < bufferReset {
e.cur += int32(len(src))
}
}

func (e *fastBase) addBlock(src []byte) int32 {
Expand Down Expand Up @@ -714,7 +725,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
}

// Reset the encoding table.
func (e *fastBase) Reset() {
func (e *fastBase) Reset(singleBlock bool) {
if e.blk == nil {
e.blk = &blockEnc{}
e.blk.init()
Expand All @@ -727,7 +738,7 @@ func (e *fastBase) Reset() {
} else {
e.crc.Reset()
}
if cap(e.hist) < int(e.maxMatchOff*2) {
if !singleBlock && cap(e.hist) < int(e.maxMatchOff*2) {
l := e.maxMatchOff * 2
// Make it at least 1MB.
if l < 1<<20 {
Expand Down
21 changes: 13 additions & 8 deletions zstd/encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type encoder interface {
AppendCRC([]byte) []byte
WindowSize(size int) int32
UseBlock(*blockEnc)
Reset()
Reset(singleBlock bool)
}

type encoderState struct {
Expand Down Expand Up @@ -82,7 +82,10 @@ func (e *Encoder) initialize() {
}
e.encoders = make(chan encoder, e.o.concurrent)
for i := 0; i < e.o.concurrent; i++ {
e.encoders <- e.o.encoder()
enc := e.o.encoder()
// If not single block, history will be allocated on first use.
enc.Reset(true)
e.encoders <- enc
}
}

Expand Down Expand Up @@ -112,7 +115,7 @@ func (e *Encoder) Reset(w io.Writer) {
s.filling = s.filling[:0]
s.current = s.current[:0]
s.previous = s.previous[:0]
s.encoder.Reset()
s.encoder.Reset(false)
s.headerWritten = false
s.eofWritten = false
s.fullFrameWritten = false
Expand Down Expand Up @@ -445,11 +448,10 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
enc := <-e.encoders
defer func() {
// Release encoder reference to last block.
enc.Reset()
// If a non-single block is needed the encoder will reset again.
enc.Reset(true)
e.encoders <- enc
}()
enc.Reset()
blk := enc.Block()
// Use single segments when above minimum window and below 1MB.
single := len(src) < 1<<20 && len(src) > MinWindowSize
if e.o.single != nil {
Expand All @@ -472,12 +474,13 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
panic(err)
}

if len(src) <= e.o.blockSize && len(src) <= maxBlockSize {
// If we can do everything in one block, prefer that.
if len(src) <= maxCompressedBlockSize {
// Slightly faster with no history and everything in one block.
if e.o.crc {
_, _ = enc.CRC().Write(src)
}
blk.reset(nil)
blk := enc.Block()
blk.last = true
enc.EncodeNoHist(blk, src)

Expand All @@ -504,6 +507,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
blk.output = oldout
} else {
enc.Reset(false)
blk := enc.Block()
for len(src) > 0 {
todo := src
if len(todo) > e.o.blockSize {
Expand Down