6
6
// Reader and Writer support the classic LZMA format. Reader2 and
7
7
// Writer2 support the decoding and encoding of LZMA2 streams.
8
8
//
9
- // The package is written completely in Go and doesn't rely on any external
9
+ // The package is written completely in Go and does not rely on any external
10
10
// library.
11
11
package lzma
12
12
13
13
import (
14
14
"errors"
15
+ "fmt"
15
16
"io"
16
17
)
17
18
18
19
// ReaderConfig stores the parameters for the reader of the classic LZMA
19
20
// format.
20
21
type ReaderConfig struct {
22
+ // Since v0.5.14 this parameter sets an upper limit for a .lzma file's
23
+ // dictionary size. This helps to mitigate problems with mangled
24
+ // headers.
21
25
DictCap int
22
26
}
23
27
24
28
// fill converts the zero values of the configuration to the default values.
25
29
func (c * ReaderConfig ) fill () {
26
30
if c .DictCap == 0 {
27
- c .DictCap = 8 * 1024 * 1024
31
+ // set an upper limit of 2 GB for dictionary capacity to address
32
+ // the zero prefix security issue.
33
+ c .DictCap = 1 << 31
34
+ // original: c.DictCap = 8 * 1024 * 1024
28
35
}
29
36
}
30
37
@@ -39,10 +46,33 @@ func (c *ReaderConfig) Verify() error {
39
46
}
40
47
41
48
// Reader provides a reader for LZMA files or streams.
49
+ //
50
+ // # Security concerns
51
+ //
52
+ // Note that LZMA format doesn't support a magic marker in the header. So
53
+ // [NewReader] cannot determine whether it reads the actual header. For instance
54
+ // the LZMA stream might have a zero byte in front of the reader, leading to
55
+ // larger dictionary sizes and file sizes. The code will detect later that there
56
+ // are problems with the stream, but the dictionary has already been allocated
57
+ // and this might consume a lot of memory.
58
+ //
59
+ // Version 0.5.14 introduces built-in mitigations:
60
+ //
61
+ // - The [ReaderConfig] DictCap field is now interpreted as a limit for the
62
+ // dictionary size.
63
+ // - The default is 2 Gigabytes (2^31 bytes).
64
+ // - Users can check with the [Reader.Header] method what the actual values are in
65
+ // their LZMA files and set a smaller limit using [ReaderConfig].
66
+ // - The dictionary size doesn't exceed the larger of the file size and
67
+ // the minimum dictionary size. This is another measure to prevent huge
68
+ // memory allocations for the dictionary.
69
+ // - The code supports stream sizes only up to a pebibyte (1024^5).
42
70
type Reader struct {
43
- lzma io.Reader
44
- h header
45
- d * decoder
71
+ lzma io.Reader
72
+ header Header
73
+ // headerOrig stores the original header read from the stream.
74
+ headerOrig Header
75
+ d * decoder
46
76
}
47
77
48
78
// NewReader creates a new reader for an LZMA stream using the classic
@@ -51,8 +81,37 @@ func NewReader(lzma io.Reader) (r *Reader, err error) {
51
81
return ReaderConfig {}.NewReader (lzma )
52
82
}
53
83
84
+ // ErrDictSize reports about an error of the dictionary size.
85
+ type ErrDictSize struct {
86
+ ConfigDictCap int
87
+ HeaderDictSize uint32
88
+ Message string
89
+ }
90
+
91
+ // Error returns the error message.
92
+ func (e * ErrDictSize ) Error () string {
93
+ return e .Message
94
+ }
95
+
96
+ func newErrDictSize (messageformat string ,
97
+ configDictCap int , headerDictSize uint32 ,
98
+ args ... interface {}) * ErrDictSize {
99
+ newArgs := make ([]interface {}, len (args )+ 2 )
100
+ newArgs [0 ] = configDictCap
101
+ newArgs [1 ] = headerDictSize
102
+ copy (newArgs [2 :], args )
103
+ return & ErrDictSize {
104
+ ConfigDictCap : configDictCap ,
105
+ HeaderDictSize : headerDictSize ,
106
+ Message : fmt .Sprintf (messageformat , newArgs ... ),
107
+ }
108
+ }
109
+
110
+ // We support only files not larger than 1 << 50 bytes (a pebibyte, 1024^5).
111
+ const maxStreamSize = 1 << 50
112
+
54
113
// NewReader creates a new reader for an LZMA stream in the classic
55
- // format. The function reads and verifies the the header of the LZMA
114
+ // format. The function reads and verifies the header of the LZMA
56
115
// stream.
57
116
func (c ReaderConfig ) NewReader (lzma io.Reader ) (r * Reader , err error ) {
58
117
if err = c .Verify (); err != nil {
@@ -66,29 +125,63 @@ func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
66
125
return nil , err
67
126
}
68
127
r = & Reader {lzma : lzma }
69
- if err = r .h .unmarshalBinary (data ); err != nil {
128
+ if err = r .header .unmarshalBinary (data ); err != nil {
70
129
return nil , err
71
130
}
72
- if r .h .dictCap < MinDictCap {
73
- r .h .dictCap = MinDictCap
131
+ r .headerOrig = r .header
132
+ dictSize := int64 (r .header .DictSize )
133
+ if int64 (c .DictCap ) < dictSize {
134
+ return nil , newErrDictSize (
135
+ "lzma: header dictionary size %[2]d exceeds configured dictionary capacity %[1]d" ,
136
+ c .DictCap , uint32 (dictSize ),
137
+ )
138
+ }
139
+ if dictSize < MinDictCap {
140
+ dictSize = MinDictCap
141
+ }
142
+ // original code: disabled this because there is no point in increasing
143
+ // the dictionary above what is stated in the file.
144
+ /*
145
+ if int64(c.DictCap) > int64(dictSize) {
146
+ dictSize = int64(c.DictCap)
147
+ }
148
+ */
149
+ size := r .header .Size
150
+ if size >= 0 && size < dictSize {
151
+ dictSize = size
74
152
}
75
- dictCap := r .h .dictCap
76
- if c .DictCap > dictCap {
77
- dictCap = c .DictCap
153
+ // Protect against modified or malicious headers.
154
+ if size > maxStreamSize {
155
+ return nil , fmt .Errorf (
156
+ "lzma: stream size %d exceeds a pebibyte (1024^5)" ,
157
+ size )
78
158
}
159
+ if dictSize < MinDictCap {
160
+ dictSize = MinDictCap
161
+ }
162
+
163
+ r .header .DictSize = uint32 (dictSize )
79
164
80
- state := newState (r .h . properties )
81
- dict , err := newDecoderDict (dictCap )
165
+ state := newState (r .header . Properties )
166
+ dict , err := newDecoderDict (int ( dictSize ) )
82
167
if err != nil {
83
168
return nil , err
84
169
}
85
- r .d , err = newDecoder (ByteReader (lzma ), state , dict , r .h . size )
170
+ r .d , err = newDecoder (ByteReader (lzma ), state , dict , r .header . Size )
86
171
if err != nil {
87
172
return nil , err
88
173
}
89
174
return r , nil
90
175
}
91
176
177
+ // Header returns the header as read from the LZMA stream. It is intended to
178
+ // allow the user to understand what parameters are typically provided in the
179
+ // headers of the LZMA files and set the DictCap field in [ReaderConfig]
180
+ // accordingly.
181
+ func (r * Reader ) Header () (h Header , ok bool ) {
182
+ return r .headerOrig , r .d != nil
183
+ }
184
+
92
185
// EOSMarker indicates that an EOS marker has been encountered.
93
186
func (r * Reader ) EOSMarker () bool {
94
187
return r .d .eosMarker
0 commit comments