Skip to content

inflate: Read more bits when decoding #232

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions flate/deflate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,24 @@ type reverseBitsTest struct {
}

var deflateTests = []*deflateTest{
{[]byte{}, 0, []byte{1, 0, 0, 255, 255}},
{[]byte{0x11}, BestCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{0x11}, BestCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{0x11}, BestCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{}, 0, []byte{0x3, 0x0}},
{[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}},
{[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}},
{[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}},

{[]byte{0x11}, 0, []byte{0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255}},
{[]byte{0x11, 0x12}, 0, []byte{0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255}},
{[]byte{0x11}, 0, []byte{0x0, 0x1, 0x0, 0xfe, 0xff, 0x11, 0x3, 0x0}},
{[]byte{0x11, 0x12}, 0, []byte{0x0, 0x2, 0x0, 0xfd, 0xff, 0x11, 0x12, 0x3, 0x0}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 0,
[]byte{0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255},
[]byte{0x0, 0x8, 0x0, 0xf7, 0xff, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x3, 0x0},
},
{[]byte{}, 1, []byte{1, 0, 0, 255, 255}},
{[]byte{0x11}, BestCompression, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{0x11, 0x12}, BestCompression, []byte{18, 20, 2, 4, 0, 0, 255, 255}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, BestCompression, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}},
{[]byte{}, 9, []byte{1, 0, 0, 255, 255}},
{[]byte{0x11}, 9, []byte{18, 4, 4, 0, 0, 255, 255}},
{[]byte{0x11, 0x12}, 9, []byte{18, 20, 2, 4, 0, 0, 255, 255}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}},
{[]byte{}, 1, []byte{0x3, 0x0}},
{[]byte{0x11}, BestCompression, []byte{0x12, 0x4, 0xc, 0x0}},
{[]byte{0x11, 0x12}, BestCompression, []byte{0x12, 0x14, 0x2, 0xc, 0x0}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, BestCompression, []byte{0x12, 0x84, 0x2, 0xc0, 0x0}},
{[]byte{}, 9, []byte{0x3, 0x0}},
{[]byte{0x11}, 9, []byte{0x12, 0x4, 0xc, 0x0}},
{[]byte{0x11, 0x12}, 9, []byte{0x12, 0x14, 0x2, 0xc, 0x0}},
{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{0x12, 0x84, 0x2, 0xc0, 0x0}},
}

var deflateInflateTests = []*deflateInflateTest{
Expand Down Expand Up @@ -110,7 +110,7 @@ func TestBulkHash4(t *testing.T) {
}

func TestDeflate(t *testing.T) {
for _, h := range deflateTests {
for i, h := range deflateTests {
var buf bytes.Buffer
w, err := NewWriter(&buf, h.level)
if err != nil {
Expand All @@ -120,7 +120,7 @@ func TestDeflate(t *testing.T) {
w.Write(h.in)
w.Close()
if !bytes.Equal(buf.Bytes(), h.out) {
t.Errorf("Deflate(%d, %x) = \n%#v, want \n%#v", h.level, h.in, buf.Bytes(), h.out)
t.Errorf("%d: Deflate(%d, %x) = \n%#v, want \n%#v", i, h.level, h.in, buf.Bytes(), h.out)
}
}
}
Expand Down
13 changes: 13 additions & 0 deletions flate/huffman_bit_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,9 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
}
}

// writeStoredHeader will write a stored header.
// If the stored block is only used for EOF,
// it is replaced with a fixed huffman block.
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
if w.err != nil {
return
Expand All @@ -493,6 +496,16 @@ func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) {
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
}

// To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes.
if length == 0 && isEof {
w.writeFixedHeader(isEof)
// EOB: 7 bits, value: 0
w.writeBits(0, 7)
w.flush()
return
}

var flag int32
if isEof {
flag = 1
Expand Down
4 changes: 2 additions & 2 deletions flate/huffman_code.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ func generateFixedOffsetEncoding() *huffmanEncoder {
return h
}

var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding()
var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding()
var fixedLiteralEncoding = generateFixedLiteralEncoding()
var fixedOffsetEncoding = generateFixedOffsetEncoding()

func (h *huffmanEncoder) bitLength(freq []uint16) int {
var total int
Expand Down
50 changes: 34 additions & 16 deletions flate/inflate.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ const (
)

type huffmanDecoder struct {
min int // the minimum code length
maxRead int // the maximum number of bits we can read and not overread
chunks *[huffmanNumChunks]uint16 // chunks as described above
links [][]uint16 // overflow links
linkMask uint32 // mask the width of the link table
Expand All @@ -126,12 +126,12 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{}
}
if h.min != 0 {
if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}

// Count number of codes of each length,
// compute min and max length.
// compute maxRead and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range lengths {
Expand Down Expand Up @@ -178,7 +178,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
return false
}

h.min = min
h.maxRead = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
Expand Down Expand Up @@ -543,12 +543,18 @@ func (f *decompressor) readHuffman() error {
return CorruptInputError(f.roffset)
}

// As an optimization, we can initialize the min bits to read at a time
// As an optimization, we can initialize the maxRead bits to read at a time
// for the HLIT tree to the length of the EOB marker since we know that
// every block must terminate with one. This preserves the property that
// we never read any extra bytes after the end of the DEFLATE stream.
if f.h1.min < f.bits[endBlockMarker] {
f.h1.min = f.bits[endBlockMarker]
if f.h1.maxRead < f.bits[endBlockMarker] {
f.h1.maxRead = f.bits[endBlockMarker]
}
if !f.final {
// If not the final block, the smallest block possible is
// a predefined table, BTYPE=01, with a single EOB marker.
// This will take up 3 + 7 bits.
f.h1.maxRead += 10
}

return nil
Expand Down Expand Up @@ -726,21 +732,33 @@ copyHistory:
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
f.nb = 0
f.b = 0
left := (f.nb) & 7
f.nb -= left
f.b >>= left

offBytes := f.nb >> 3
// Unfilled values will be overwritten.
f.buf[0] = uint8(f.b)
f.buf[1] = uint8(f.b >> 8)
f.buf[2] = uint8(f.b >> 16)
f.buf[3] = uint8(f.b >> 24)

f.roffset += int64(offBytes)
f.nb, f.b = 0, 0

// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4])
nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
f.roffset += int64(nr)
if err != nil {
f.err = noEOF(err)
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
if nn != ^n {
if debugDecode {
fmt.Println("uint16(nn) != uint16(^n)", nn, ^n)
ncomp := ^n
fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
}
f.err = CorruptInputError(f.roffset)
return
Expand All @@ -752,7 +770,7 @@ func (f *decompressor) dataBlock() {
return
}

f.copyLen = n
f.copyLen = int(n)
f.copyData()
}

Expand Down Expand Up @@ -816,7 +834,7 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
// with single element, huffSym must error on these two edge cases. In both
// cases, the chunks slice will be 0 for the invalid sequence, leading it
// satisfy the n == 0 check below.
n := uint(h.min)
n := uint(h.maxRead)
// Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
// but is smart enough to keep local variables in registers, so use nb and b,
// inline call to moreBits and reassign b,nb back to f on return.
Expand Down