Skip to content

Commit 88ddf1d

Browse files
committed
Address Security Issue GHSA-jc7w-c686-c4v9
This commit addresses security issue GHSA-jc7w-c686-c4v9. The mitigating measures are described for the Reader type and I added a TestZeroPrefixIssue function to test the mitigations. // # Security concerns // // Note that LZMA format doesn't support a magic marker in the header. So // [NewReader] cannot determine whether it reads the actual header. For instance // the LZMA stream might have a zero byte in front of the reader, leading to // larger dictionary sizes and file sizes. The code will detect later that there // are problems with the stream, but the dictionary has already been allocated // and this might consume a lot of memory. // // Version 0.5.14 introduces built-in mitigations: // // - The [ReaderConfig] DictCap field is now interpreted as a limit for the // dictionary size. // - The default is 2 Gigabytes (2^31 bytes). // - Users can check with the [Reader.Header] method what the actual values are in // their LZMA files and set a smaller limit using [ReaderConfig]. // - The dictionary size doesn't exceed the larger of the file size and // the minimum dictionary size. This is another measure to prevent huge // memory allocations for the dictionary. // - The code supports stream sizes only up to a pebibyte (1024^5).
1 parent c8314b8 commit 88ddf1d

File tree

7 files changed

+217
-71
lines changed

7 files changed

+217
-71
lines changed

TODO.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
# TODO list
22

3-
## Release v0.5.x
4-
5-
1. Support check flag in gxz command.
3+
## Release v0.5.14
4+
5+
* If the DictionarySize is larger than the UncompressedSize set it to
6+
UncompressedSize
7+
* make a Header() (h Header, ok bool) function so the user can implement its own
8+
policy
9+
* Add documentation to Reader to explain the situation
10+
* Add a TODO for the rewrite version
611

712
## Release v0.6
813

lzma/header.go

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -60,36 +60,36 @@ const noHeaderSize uint64 = 1<<64 - 1
6060
// HeaderLen provides the length of the LZMA file header.
6161
const HeaderLen = 13
6262

63-
// header represents the header of an LZMA file.
64-
type header struct {
65-
properties Properties
66-
dictCap int
67-
// uncompressed size; negative value if no size is given
68-
size int64
63+
// Header represents the Header of an LZMA file.
64+
type Header struct {
65+
Properties Properties
66+
DictSize uint32
67+
// uncompressed Size; negative value if no Size is given
68+
Size int64
6969
}
7070

7171
// marshalBinary marshals the header.
72-
func (h *header) marshalBinary() (data []byte, err error) {
73-
if err = h.properties.verify(); err != nil {
72+
func (h *Header) marshalBinary() (data []byte, err error) {
73+
if err = h.Properties.verify(); err != nil {
7474
return nil, err
7575
}
76-
if !(0 <= h.dictCap && int64(h.dictCap) <= MaxDictCap) {
76+
if !(h.DictSize <= MaxDictCap) {
7777
return nil, fmt.Errorf("lzma: DictCap %d out of range",
78-
h.dictCap)
78+
h.DictSize)
7979
}
8080

8181
data = make([]byte, 13)
8282

8383
// property byte
84-
data[0] = h.properties.Code()
84+
data[0] = h.Properties.Code()
8585

8686
// dictionary capacity
87-
putUint32LE(data[1:5], uint32(h.dictCap))
87+
putUint32LE(data[1:5], uint32(h.DictSize))
8888

8989
// uncompressed size
9090
var s uint64
91-
if h.size > 0 {
92-
s = uint64(h.size)
91+
if h.Size > 0 {
92+
s = uint64(h.Size)
9393
} else {
9494
s = noHeaderSize
9595
}
@@ -99,20 +99,20 @@ func (h *header) marshalBinary() (data []byte, err error) {
9999
}
100100

101101
// unmarshalBinary unmarshals the header.
102-
func (h *header) unmarshalBinary(data []byte) error {
102+
func (h *Header) unmarshalBinary(data []byte) error {
103103
if len(data) != HeaderLen {
104104
return errors.New("lzma.unmarshalBinary: data has wrong length")
105105
}
106106

107107
// properties
108108
var err error
109-
if h.properties, err = PropertiesForCode(data[0]); err != nil {
109+
if h.Properties, err = PropertiesForCode(data[0]); err != nil {
110110
return err
111111
}
112112

113113
// dictionary capacity
114-
h.dictCap = int(uint32LE(data[1:]))
115-
if h.dictCap < 0 {
114+
h.DictSize = uint32LE(data[1:])
115+
if int(h.DictSize) < 0 {
116116
return errors.New(
117117
"LZMA header: dictionary capacity exceeds maximum " +
118118
"integer")
@@ -121,10 +121,10 @@ func (h *header) unmarshalBinary(data []byte) error {
121121
// uncompressed size
122122
s := uint64LE(data[5:])
123123
if s == noHeaderSize {
124-
h.size = -1
124+
h.Size = -1
125125
} else {
126-
h.size = int64(s)
127-
if h.size < 0 {
126+
h.Size = int64(s)
127+
if h.Size < 0 {
128128
return errors.New(
129129
"LZMA header: uncompressed size " +
130130
"out of int64 range")
@@ -134,9 +134,9 @@ func (h *header) unmarshalBinary(data []byte) error {
134134
return nil
135135
}
136136

137-
// validDictCap checks whether the dictionary capacity is correct. This
137+
// validDictSize checks whether the dictionary capacity is correct. This
138138
// is used to weed out wrong file headers.
139-
func validDictCap(dictcap int) bool {
139+
func validDictSize(dictcap int) bool {
140140
if int64(dictcap) == MaxDictCap {
141141
return true
142142
}
@@ -155,13 +155,16 @@ func validDictCap(dictcap int) bool {
155155
// dictionary sizes of 2^n or 2^n+2^(n-1) with n >= 10 or 2^32-1. If
156156
// there is an explicit size it must not exceed 256 GiB. The length of
157157
// the data argument must be HeaderLen.
158+
//
159+
// This function should be disregarded because there is no guarantee that LZMA
160+
// files follow the constraints.
158161
func ValidHeader(data []byte) bool {
159-
var h header
162+
var h Header
160163
if err := h.unmarshalBinary(data); err != nil {
161164
return false
162165
}
163-
if !validDictCap(h.dictCap) {
166+
if !validDictSize(int(h.DictSize)) {
164167
return false
165168
}
166-
return h.size < 0 || h.size <= 1<<38
169+
return h.Size < 0 || h.Size <= 1<<38
167170
}

lzma/header2_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ func TestHeaderLen(t *testing.T) {
7878
}
7979

8080
func chunkHeaderSamples(t *testing.T) []chunkHeader {
81+
_ = t
8182
props := Properties{LC: 3, LP: 0, PB: 2}
8283
headers := make([]chunkHeader, 0, 12)
8384
for c := cEOS; c <= cLRND; c++ {

lzma/header_test.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,18 @@ package lzma
77
import "testing"
88

99
func TestHeaderMarshalling(t *testing.T) {
10-
tests := []header{
11-
{properties: Properties{3, 0, 2}, dictCap: 8 * 1024 * 1024,
12-
size: -1},
13-
{properties: Properties{4, 3, 3}, dictCap: 4096,
14-
size: 10},
10+
tests := []Header{
11+
{Properties: Properties{3, 0, 2}, DictSize: 8 * 1024 * 1024,
12+
Size: -1},
13+
{Properties: Properties{4, 3, 3}, DictSize: 4096,
14+
Size: 10},
1515
}
1616
for _, h := range tests {
1717
data, err := h.marshalBinary()
1818
if err != nil {
1919
t.Fatalf("marshalBinary error %s", err)
2020
}
21-
var g header
21+
var g Header
2222
if err = g.unmarshalBinary(data); err != nil {
2323
t.Fatalf("unmarshalBinary error %s", err)
2424
}
@@ -29,11 +29,11 @@ func TestHeaderMarshalling(t *testing.T) {
2929
}
3030

3131
func TestValidHeader(t *testing.T) {
32-
tests := []header{
33-
{properties: Properties{3, 0, 2}, dictCap: 8 * 1024 * 1024,
34-
size: -1},
35-
{properties: Properties{4, 3, 3}, dictCap: 4096,
36-
size: 10},
32+
tests := []Header{
33+
{Properties: Properties{3, 0, 2}, DictSize: 8 * 1024 * 1024,
34+
Size: -1},
35+
{Properties: Properties{4, 3, 3}, DictSize: 4096,
36+
Size: 10},
3737
}
3838
for _, h := range tests {
3939
data, err := h.marshalBinary()

lzma/reader.go

Lines changed: 108 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,32 @@
66
// Reader and Writer support the classic LZMA format. Reader2 and
77
// Writer2 support the decoding and encoding of LZMA2 streams.
88
//
9-
// The package is written completely in Go and doesn't rely on any external
9+
// The package is written completely in Go and does not rely on any external
1010
// library.
1111
package lzma
1212

1313
import (
1414
"errors"
15+
"fmt"
1516
"io"
1617
)
1718

1819
// ReaderConfig stores the parameters for the reader of the classic LZMA
1920
// format.
2021
type ReaderConfig struct {
22+
// Since v0.5.14 this parameter sets an upper limit for a .lzma file's
23+
// dictionary size. This helps to mitigate problems with mangled
24+
// headers.
2125
DictCap int
2226
}
2327

2428
// fill converts the zero values of the configuration to the default values.
2529
func (c *ReaderConfig) fill() {
2630
if c.DictCap == 0 {
27-
c.DictCap = 8 * 1024 * 1024
31+
// set an upper limit of 2 GB for dictionary capacity to address
32+
// the zero prefix security issue.
33+
c.DictCap = 1 << 31
34+
// original: c.DictCap = 8 * 1024 * 1024
2835
}
2936
}
3037

@@ -39,10 +46,33 @@ func (c *ReaderConfig) Verify() error {
3946
}
4047

4148
// Reader provides a reader for LZMA files or streams.
49+
//
50+
// # Security concerns
51+
//
52+
// Note that LZMA format doesn't support a magic marker in the header. So
53+
// [NewReader] cannot determine whether it reads the actual header. For instance
54+
// the LZMA stream might have a zero byte in front of the reader, leading to
55+
// larger dictionary sizes and file sizes. The code will detect later that there
56+
// are problems with the stream, but the dictionary has already been allocated
57+
// and this might consume a lot of memory.
58+
//
59+
// Version 0.5.14 introduces built-in mitigations:
60+
//
61+
// - The [ReaderConfig] DictCap field is now interpreted as a limit for the
62+
// dictionary size.
63+
// - The default is 2 Gigabytes (2^31 bytes).
64+
// - Users can check with the [Reader.Header] method what the actual values are in
65+
// their LZMA files and set a smaller limit using [ReaderConfig].
66+
// - The dictionary size doesn't exceed the larger of the file size and
67+
// the minimum dictionary size. This is another measure to prevent huge
68+
// memory allocations for the dictionary.
69+
// - The code supports stream sizes only up to a pebibyte (1024^5).
4270
type Reader struct {
43-
lzma io.Reader
44-
h header
45-
d *decoder
71+
lzma io.Reader
72+
header Header
73+
// headerOrig stores the original header read from the stream.
74+
headerOrig Header
75+
d *decoder
4676
}
4777

4878
// NewReader creates a new reader for an LZMA stream using the classic
@@ -51,8 +81,37 @@ func NewReader(lzma io.Reader) (r *Reader, err error) {
5181
return ReaderConfig{}.NewReader(lzma)
5282
}
5383

84+
// ErrDictSize reports about an error of the dictionary size.
85+
type ErrDictSize struct {
86+
ConfigDictCap int
87+
HeaderDictSize uint32
88+
Message string
89+
}
90+
91+
// Error returns the error message.
92+
func (e *ErrDictSize) Error() string {
93+
return e.Message
94+
}
95+
96+
func newErrDictSize(messageformat string,
97+
configDictCap int, headerDictSize uint32,
98+
args ...interface{}) *ErrDictSize {
99+
newArgs := make([]interface{}, len(args)+2)
100+
newArgs[0] = configDictCap
101+
newArgs[1] = headerDictSize
102+
copy(newArgs[2:], args)
103+
return &ErrDictSize{
104+
ConfigDictCap: configDictCap,
105+
HeaderDictSize: headerDictSize,
106+
Message: fmt.Sprintf(messageformat, newArgs...),
107+
}
108+
}
109+
110+
// We support only files not larger than 1 << 50 bytes (a pebibyte, 1024^5).
111+
const maxStreamSize = 1 << 50
112+
54113
// NewReader creates a new reader for an LZMA stream in the classic
55-
// format. The function reads and verifies the the header of the LZMA
114+
// format. The function reads and verifies the header of the LZMA
56115
// stream.
57116
func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
58117
if err = c.Verify(); err != nil {
@@ -66,29 +125,63 @@ func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
66125
return nil, err
67126
}
68127
r = &Reader{lzma: lzma}
69-
if err = r.h.unmarshalBinary(data); err != nil {
128+
if err = r.header.unmarshalBinary(data); err != nil {
70129
return nil, err
71130
}
72-
if r.h.dictCap < MinDictCap {
73-
r.h.dictCap = MinDictCap
131+
r.headerOrig = r.header
132+
dictSize := int64(r.header.DictSize)
133+
if int64(c.DictCap) < dictSize {
134+
return nil, newErrDictSize(
135+
"lzma: header dictionary size %[2]d exceeds configured dictionary capacity %[1]d",
136+
c.DictCap, uint32(dictSize),
137+
)
138+
}
139+
if dictSize < MinDictCap {
140+
dictSize = MinDictCap
141+
}
142+
// original code: disabled this because there is no point in increasing
143+
// the dictionary above what is stated in the file.
144+
/*
145+
if int64(c.DictCap) > int64(dictSize) {
146+
dictSize = int64(c.DictCap)
147+
}
148+
*/
149+
size := r.header.Size
150+
if size >= 0 && size < dictSize {
151+
dictSize = size
74152
}
75-
dictCap := r.h.dictCap
76-
if c.DictCap > dictCap {
77-
dictCap = c.DictCap
153+
// Protect against modified or malicious headers.
154+
if size > maxStreamSize {
155+
return nil, fmt.Errorf(
156+
"lzma: stream size %d exceeds a pebibyte (1024^5)",
157+
size)
78158
}
159+
if dictSize < MinDictCap {
160+
dictSize = MinDictCap
161+
}
162+
163+
r.header.DictSize = uint32(dictSize)
79164

80-
state := newState(r.h.properties)
81-
dict, err := newDecoderDict(dictCap)
165+
state := newState(r.header.Properties)
166+
dict, err := newDecoderDict(int(dictSize))
82167
if err != nil {
83168
return nil, err
84169
}
85-
r.d, err = newDecoder(ByteReader(lzma), state, dict, r.h.size)
170+
r.d, err = newDecoder(ByteReader(lzma), state, dict, r.header.Size)
86171
if err != nil {
87172
return nil, err
88173
}
89174
return r, nil
90175
}
91176

177+
// Header returns the header as read from the LZMA stream. It is intended to
178+
// allow the user to understand what parameters are typically provided in the
179+
// headers of the LZMA files and set the DictCap field in [ReaderConfig]
180+
// accordingly.
181+
func (r *Reader) Header() (h Header, ok bool) {
182+
return r.headerOrig, r.d != nil
183+
}
184+
92185
// EOSMarker indicates that an EOS marker has been encountered.
93186
func (r *Reader) EOSMarker() bool {
94187
return r.d.eosMarker

0 commit comments

Comments
 (0)