Skip to content

Commit 72b501c

Browse files
committed
compress/lzw: add Reset method to Reader and Writer
We add a Reset method which clears any internal state of an encoder or a decoder to let it be reused again as a new Writer or Reader respectively. We also export the encoder and decoder structs, renaming them to be Reader and Writer, and we guarantee that the underlying types from the constructors will always be Reader and Writer respectively. Benchmark results by reusing the encoder: on cpu: Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz name time/op Decoder/1e4-8 93.6µs ± 1% Decoder/1e-Reuse4-8 87.7µs ± 1% Decoder/1e5-8 877µs ± 1% Decoder/1e-Reuse5-8 860µs ± 3% Decoder/1e6-8 8.79ms ± 1% Decoder/1e-Reuse6-8 8.82ms ± 4% Encoder/1e4-8 168µs ± 2% Encoder/1e-Reuse4-8 160µs ± 1% Encoder/1e5-8 1.64ms ± 1% Encoder/1e-Reuse5-8 1.61ms ± 2% Encoder/1e6-8 16.2ms ± 6% Encoder/1e-Reuse6-8 15.8ms ± 2% name speed Decoder/1e4-8 107MB/s ± 1% Decoder/1e-Reuse4-8 114MB/s ± 1% Decoder/1e5-8 114MB/s ± 1% Decoder/1e-Reuse5-8 116MB/s ± 3% Decoder/1e6-8 114MB/s ± 1% Decoder/1e-Reuse6-8 113MB/s ± 5% Encoder/1e4-8 59.7MB/s ± 2% Encoder/1e-Reuse4-8 62.4MB/s ± 1% Encoder/1e5-8 61.1MB/s ± 1% Encoder/1e-Reuse5-8 62.0MB/s ± 2% Encoder/1e6-8 61.7MB/s ± 5% Encoder/1e-Reuse6-8 63.4MB/s ± 2% name alloc/op Decoder/1e4-8 21.8kB ± 0% Decoder/1e-Reuse4-8 50.0B ± 0% Decoder/1e5-8 21.8kB ± 0% Decoder/1e-Reuse5-8 70.4B ± 2% Decoder/1e6-8 21.9kB ± 0% Decoder/1e-Reuse6-8 271B ± 3% Encoder/1e4-8 77.9kB ± 0% Encoder/1e-Reuse4-8 4.17kB ± 0% Encoder/1e5-8 77.9kB ± 0% Encoder/1e-Reuse5-8 4.27kB ± 0% Encoder/1e6-8 77.9kB ± 0% Encoder/1e-Reuse6-8 5.22kB ± 0% name allocs/op Decoder/1e4-8 2.00 ± 0% Decoder/1e-Reuse4-8 1.00 ± 0% Decoder/1e5-8 2.00 ± 0% Decoder/1e-Reuse5-8 1.00 ± 0% Decoder/1e6-8 2.00 ± 0% Decoder/1e-Reuse6-8 1.00 ± 0% Encoder/1e4-8 3.00 ± 0% Encoder/1e-Reuse4-8 2.00 ± 0% Encoder/1e5-8 3.00 ± 0% Encoder/1e-Reuse5-8 2.00 ± 0% Encoder/1e6-8 3.00 ± 0% Encoder/1e-Reuse6-8 2.00 ± 0% Fixes #26535 Change-Id: Icde613fea6234a5bdce95f1e49910f5687e30b22 Reviewed-on: https://go-review.googlesource.com/c/go/+/273667 Trust: Agniva De Sarker <[email protected]> Trust: Joe Tsai <[email protected]> Reviewed-by: Joe Tsai <[email protected]>
1 parent 119d76d commit 72b501c

File tree

4 files changed

+349
-206
lines changed

4 files changed

+349
-206
lines changed

src/compress/lzw/reader.go

Lines changed: 114 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ const (
4242
flushBuffer = 1 << maxWidth
4343
)
4444

45-
// decoder is the state from which the readXxx method converts a byte
46-
// stream into a code stream.
47-
type decoder struct {
45+
// Reader is an io.Reader which can be used to read compressed data in the
46+
// LZW format.
47+
type Reader struct {
4848
r io.ByteReader
4949
bits uint32
5050
nBits uint
5151
width uint
52-
read func(*decoder) (uint16, error) // readLSB or readMSB
53-
litWidth int // width in bits of literal codes
52+
read func(*Reader) (uint16, error) // readLSB or readMSB
53+
litWidth int // width in bits of literal codes
5454
err error
5555

5656
// The first 1<<litWidth codes are literal codes.
@@ -87,148 +87,158 @@ type decoder struct {
8787
}
8888

8989
// readLSB returns the next code for "Least Significant Bits first" data.
90-
func (d *decoder) readLSB() (uint16, error) {
91-
for d.nBits < d.width {
92-
x, err := d.r.ReadByte()
90+
func (r *Reader) readLSB() (uint16, error) {
91+
for r.nBits < r.width {
92+
x, err := r.r.ReadByte()
9393
if err != nil {
9494
return 0, err
9595
}
96-
d.bits |= uint32(x) << d.nBits
97-
d.nBits += 8
96+
r.bits |= uint32(x) << r.nBits
97+
r.nBits += 8
9898
}
99-
code := uint16(d.bits & (1<<d.width - 1))
100-
d.bits >>= d.width
101-
d.nBits -= d.width
99+
code := uint16(r.bits & (1<<r.width - 1))
100+
r.bits >>= r.width
101+
r.nBits -= r.width
102102
return code, nil
103103
}
104104

105105
// readMSB returns the next code for "Most Significant Bits first" data.
106-
func (d *decoder) readMSB() (uint16, error) {
107-
for d.nBits < d.width {
108-
x, err := d.r.ReadByte()
106+
func (r *Reader) readMSB() (uint16, error) {
107+
for r.nBits < r.width {
108+
x, err := r.r.ReadByte()
109109
if err != nil {
110110
return 0, err
111111
}
112-
d.bits |= uint32(x) << (24 - d.nBits)
113-
d.nBits += 8
112+
r.bits |= uint32(x) << (24 - r.nBits)
113+
r.nBits += 8
114114
}
115-
code := uint16(d.bits >> (32 - d.width))
116-
d.bits <<= d.width
117-
d.nBits -= d.width
115+
code := uint16(r.bits >> (32 - r.width))
116+
r.bits <<= r.width
117+
r.nBits -= r.width
118118
return code, nil
119119
}
120120

121-
func (d *decoder) Read(b []byte) (int, error) {
121+
// Read implements io.Reader, reading uncompressed bytes from its underlying Reader.
122+
func (r *Reader) Read(b []byte) (int, error) {
122123
for {
123-
if len(d.toRead) > 0 {
124-
n := copy(b, d.toRead)
125-
d.toRead = d.toRead[n:]
124+
if len(r.toRead) > 0 {
125+
n := copy(b, r.toRead)
126+
r.toRead = r.toRead[n:]
126127
return n, nil
127128
}
128-
if d.err != nil {
129-
return 0, d.err
129+
if r.err != nil {
130+
return 0, r.err
130131
}
131-
d.decode()
132+
r.decode()
132133
}
133134
}
134135

135136
// decode decompresses bytes from r and leaves them in d.toRead.
136137
// read specifies how to decode bytes into codes.
137138
// litWidth is the width in bits of literal codes.
138-
func (d *decoder) decode() {
139+
func (r *Reader) decode() {
139140
// Loop over the code stream, converting codes into decompressed bytes.
140141
loop:
141142
for {
142-
code, err := d.read(d)
143+
code, err := r.read(r)
143144
if err != nil {
144145
if err == io.EOF {
145146
err = io.ErrUnexpectedEOF
146147
}
147-
d.err = err
148+
r.err = err
148149
break
149150
}
150151
switch {
151-
case code < d.clear:
152+
case code < r.clear:
152153
// We have a literal code.
153-
d.output[d.o] = uint8(code)
154-
d.o++
155-
if d.last != decoderInvalidCode {
154+
r.output[r.o] = uint8(code)
155+
r.o++
156+
if r.last != decoderInvalidCode {
156157
// Save what the hi code expands to.
157-
d.suffix[d.hi] = uint8(code)
158-
d.prefix[d.hi] = d.last
158+
r.suffix[r.hi] = uint8(code)
159+
r.prefix[r.hi] = r.last
159160
}
160-
case code == d.clear:
161-
d.width = 1 + uint(d.litWidth)
162-
d.hi = d.eof
163-
d.overflow = 1 << d.width
164-
d.last = decoderInvalidCode
161+
case code == r.clear:
162+
r.width = 1 + uint(r.litWidth)
163+
r.hi = r.eof
164+
r.overflow = 1 << r.width
165+
r.last = decoderInvalidCode
165166
continue
166-
case code == d.eof:
167-
d.err = io.EOF
167+
case code == r.eof:
168+
r.err = io.EOF
168169
break loop
169-
case code <= d.hi:
170-
c, i := code, len(d.output)-1
171-
if code == d.hi && d.last != decoderInvalidCode {
170+
case code <= r.hi:
171+
c, i := code, len(r.output)-1
172+
if code == r.hi && r.last != decoderInvalidCode {
172173
// code == hi is a special case which expands to the last expansion
173174
// followed by the head of the last expansion. To find the head, we walk
174175
// the prefix chain until we find a literal code.
175-
c = d.last
176-
for c >= d.clear {
177-
c = d.prefix[c]
176+
c = r.last
177+
for c >= r.clear {
178+
c = r.prefix[c]
178179
}
179-
d.output[i] = uint8(c)
180+
r.output[i] = uint8(c)
180181
i--
181-
c = d.last
182+
c = r.last
182183
}
183184
// Copy the suffix chain into output and then write that to w.
184-
for c >= d.clear {
185-
d.output[i] = d.suffix[c]
185+
for c >= r.clear {
186+
r.output[i] = r.suffix[c]
186187
i--
187-
c = d.prefix[c]
188+
c = r.prefix[c]
188189
}
189-
d.output[i] = uint8(c)
190-
d.o += copy(d.output[d.o:], d.output[i:])
191-
if d.last != decoderInvalidCode {
190+
r.output[i] = uint8(c)
191+
r.o += copy(r.output[r.o:], r.output[i:])
192+
if r.last != decoderInvalidCode {
192193
// Save what the hi code expands to.
193-
d.suffix[d.hi] = uint8(c)
194-
d.prefix[d.hi] = d.last
194+
r.suffix[r.hi] = uint8(c)
195+
r.prefix[r.hi] = r.last
195196
}
196197
default:
197-
d.err = errors.New("lzw: invalid code")
198+
r.err = errors.New("lzw: invalid code")
198199
break loop
199200
}
200-
d.last, d.hi = code, d.hi+1
201-
if d.hi >= d.overflow {
202-
if d.hi > d.overflow {
201+
r.last, r.hi = code, r.hi+1
202+
if r.hi >= r.overflow {
203+
if r.hi > r.overflow {
203204
panic("unreachable")
204205
}
205-
if d.width == maxWidth {
206-
d.last = decoderInvalidCode
206+
if r.width == maxWidth {
207+
r.last = decoderInvalidCode
207208
// Undo the d.hi++ a few lines above, so that (1) we maintain
208209
// the invariant that d.hi < d.overflow, and (2) d.hi does not
209210
// eventually overflow a uint16.
210-
d.hi--
211+
r.hi--
211212
} else {
212-
d.width++
213-
d.overflow = 1 << d.width
213+
r.width++
214+
r.overflow = 1 << r.width
214215
}
215216
}
216-
if d.o >= flushBuffer {
217+
if r.o >= flushBuffer {
217218
break
218219
}
219220
}
220221
// Flush pending output.
221-
d.toRead = d.output[:d.o]
222-
d.o = 0
222+
r.toRead = r.output[:r.o]
223+
r.o = 0
223224
}
224225

225226
var errClosed = errors.New("lzw: reader/writer is closed")
226227

227-
func (d *decoder) Close() error {
228-
d.err = errClosed // in case any Reads come along
228+
// Close closes the Reader and returns an error for any future read operation.
229+
// It does not close the underlying io.Reader.
230+
func (r *Reader) Close() error {
231+
r.err = errClosed // in case any Reads come along
229232
return nil
230233
}
231234

235+
// Reset clears the Reader's state and allows it to be reused again
236+
// as a new Reader.
237+
func (r *Reader) Reset(src io.Reader, order Order, litWidth int) {
238+
*r = Reader{}
239+
r.init(src, order, litWidth)
240+
}
241+
232242
// NewReader creates a new io.ReadCloser.
233243
// Reads from the returned io.ReadCloser read and decompress data from r.
234244
// If r does not also implement io.ByteReader,
@@ -238,32 +248,43 @@ func (d *decoder) Close() error {
238248
// The number of bits to use for literal codes, litWidth, must be in the
239249
// range [2,8] and is typically 8. It must equal the litWidth
240250
// used during compression.
251+
//
252+
// It is guaranteed that the underlying type of the returned io.ReadCloser
253+
// is a *Reader.
241254
func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser {
242-
d := new(decoder)
255+
return newReader(r, order, litWidth)
256+
}
257+
258+
func newReader(src io.Reader, order Order, litWidth int) *Reader {
259+
r := new(Reader)
260+
r.init(src, order, litWidth)
261+
return r
262+
}
263+
264+
func (r *Reader) init(src io.Reader, order Order, litWidth int) {
243265
switch order {
244266
case LSB:
245-
d.read = (*decoder).readLSB
267+
r.read = (*Reader).readLSB
246268
case MSB:
247-
d.read = (*decoder).readMSB
269+
r.read = (*Reader).readMSB
248270
default:
249-
d.err = errors.New("lzw: unknown order")
250-
return d
271+
r.err = errors.New("lzw: unknown order")
272+
return
251273
}
252274
if litWidth < 2 || 8 < litWidth {
253-
d.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
254-
return d
275+
r.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
276+
return
255277
}
256-
if br, ok := r.(io.ByteReader); ok {
257-
d.r = br
258-
} else {
259-
d.r = bufio.NewReader(r)
260-
}
261-
d.litWidth = litWidth
262-
d.width = 1 + uint(litWidth)
263-
d.clear = uint16(1) << uint(litWidth)
264-
d.eof, d.hi = d.clear+1, d.clear+1
265-
d.overflow = uint16(1) << d.width
266-
d.last = decoderInvalidCode
267278

268-
return d
279+
br, ok := src.(io.ByteReader)
280+
if !ok && src != nil {
281+
br = bufio.NewReader(src)
282+
}
283+
r.r = br
284+
r.litWidth = litWidth
285+
r.width = 1 + uint(litWidth)
286+
r.clear = uint16(1) << uint(litWidth)
287+
r.eof, r.hi = r.clear+1, r.clear+1
288+
r.overflow = uint16(1) << r.width
289+
r.last = decoderInvalidCode
269290
}

0 commit comments

Comments
 (0)