Skip to content

Commit 9c47e78

Browse files
committed
zstd: Fix back-referenced offset
Since we expand backwards early, we may be in a situation where best.s+2 has already been indexed. This will result in picking up a 0 or negative offset, which leads to corrupted data. Skip this check if best.s is less than or equal to s-2. Regression from #784 (not released)
1 parent 382ea74 commit 9c47e78

File tree

2 files changed

+29
-22
lines changed

2 files changed

+29
-22
lines changed

zstd/enc_best.go

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ type match struct {
3434
est int32
3535
}
3636

37-
const highScore = 25000
37+
const highScore = maxMatchLen * 8
3838

3939
// estBits will estimate output bits from predefined tables.
4040
func (m *match) estBits(bitsPerByte int32) {
@@ -201,6 +201,9 @@ encodeLoop:
201201
return
202202
}
203203
if debugAsserts {
204+
if offset <= 0 {
205+
panic(offset)
206+
}
204207
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
205208
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
206209
}
@@ -291,37 +294,41 @@ encodeLoop:
291294
continue
292295
}
293296

294-
s := s + 1
295297
candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
296-
cv = load6432(src, s)
297-
cv2 := load6432(src, s+1)
298+
cv = load6432(src, s+1)
299+
cv2 := load6432(src, s+2)
298300
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
299301
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
300302

301303
// Short at s+1
302-
improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1)
304+
improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
303305
// Long at s+1, s+2
304-
improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1)
305-
improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1)
306-
improve(&best, candidateL2.offset-e.cur, s+1, uint32(cv2), -1)
307-
improve(&best, candidateL2.prev-e.cur, s+1, uint32(cv2), -1)
306+
improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
307+
improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
308+
improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
309+
improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
308310
if false {
309311
// Short at s+3.
310312
// Too often worse...
311-
improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1)
313+
improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
312314
}
313-
// See if we can find a better match by checking where the current best ends.
314-
// Use that offset to see if we can find a better full match.
315-
if sAt := best.s + best.length; sAt < sLimit {
316-
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
317-
candidateEnd := e.longTable[nextHashL]
318-
// Start check at a fixed offset to allow for a few mismatches.
319-
// For this compression level 2 yields the best results.
320-
const skipBeginning = 2
321-
if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 {
322-
improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
323-
if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 {
324-
improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
315+
316+
// Start check at a fixed offset to allow for a few mismatches.
317+
// For this compression level 2 yields the best results.
318+
// We cannot do this if we have already indexed this position.
319+
const skipBeginning = 2
320+
if best.s > s-skipBeginning {
321+
// See if we can find a better match by checking where the current best ends.
322+
// Use that offset to see if we can find a better full match.
323+
if sAt := best.s + best.length; sAt < sLimit {
324+
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
325+
candidateEnd := e.longTable[nextHashL]
326+
327+
if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
328+
improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
329+
if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
330+
improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
331+
}
325332
}
326333
}
327334
}
167 KB
Binary file not shown.

0 commit comments

Comments
 (0)