Skip to content

Commit f93aa8a

Browse files
Preserve trailing extra data when updating ZIP files (#113306)
* Reimplement PR 112032 * Permit trailing malformed extra field data. This data is generated by zipalign. It needs to be accounted for and preserved. * Update test, adding test cases Prove functionality where there are 3, 4, 7, 8 and 15 bytes of padding. This covers zero, one and multiple well-formed extra fields, with and without subsequent trailing data. --------- Co-authored-by: Carlos Sánchez López <[email protected]>
1 parent 1b090a9 commit f93aa8a

File tree

4 files changed

+507
-38
lines changed

4 files changed

+507
-38
lines changed

src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,13 +733,17 @@ private void WriteFile()
733733
if (entry.OffsetOfLocalHeader >= startingOffset)
734734
{
735735
// If the pending data to write is fixed-length metadata in the header, there's no need to load the compressed file bits.
736+
// We always need to load the local file header's metadata though - at this point, this entry will be written out and we
737+
// want to make sure that we preserve that metadata.
736738
if ((entry.Changes & (ChangeState.DynamicLengthMetadata | ChangeState.StoredData)) != 0)
737739
{
738740
completeRewriteStartingOffset = Math.Min(completeRewriteStartingOffset, entry.OffsetOfLocalHeader);
739741
}
742+
743+
entry.LoadLocalHeaderExtraFieldIfNeeded();
740744
if (entry.OffsetOfLocalHeader >= completeRewriteStartingOffset)
741745
{
742-
entry.LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded();
746+
entry.LoadCompressedBytesIfNeeded();
743747
}
744748

745749
entriesToWrite.Add(entry);

src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ public partial class ZipArchiveEntry
4343
private byte[] _storedEntryNameBytes;
4444
// only apply to update mode
4545
private List<ZipGenericExtraField>? _cdUnknownExtraFields;
46+
private byte[]? _cdTrailingExtraFieldData;
4647
private List<ZipGenericExtraField>? _lhUnknownExtraFields;
48+
private byte[]? _lhTrailingExtraFieldData;
4749
private byte[] _fileComment;
4850
private readonly CompressionLevel _compressionLevel;
4951

@@ -53,6 +55,11 @@ internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd)
5355
_archive = archive;
5456

5557
_originallyInArchive = true;
58+
// It's possible for the CompressionMethod setter and DetectEntryNameVersion to update this, even without any explicit
59+
// changes. This can occur if a ZipArchive instance runs in Update mode and opens a stream with invalid data. In such
60+
// a situation, both the local file header and the central directory header will be rewritten (to prevent the headers
61+
// from falling out of sync when the central directory header is rewritten.)
62+
Changes = ZipArchive.ChangeState.Unchanged;
5663

5764
_diskNumberStart = cd.DiskNumberStart;
5865
_versionMadeByPlatform = (ZipVersionMadeByPlatform)cd.VersionMadeByCompatibility;
@@ -84,12 +91,11 @@ internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd)
8491
_lhUnknownExtraFields = null;
8592
// the cd should have this as null if we aren't in Update mode
8693
_cdUnknownExtraFields = cd.ExtraFields;
94+
_cdTrailingExtraFieldData = cd.TrailingExtraFieldData;
8795

8896
_fileComment = cd.FileComment;
8997

9098
_compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, CompressionMethod);
91-
92-
Changes = ZipArchive.ChangeState.Unchanged;
9399
}
94100

95101
// Initializes a ZipArchiveEntry instance for a new archive entry with a specified compression level.
@@ -543,8 +549,9 @@ internal void WriteCentralDirectoryFileHeader(bool forceWrite)
543549

544550

545551
// determine if we can fit zip64 extra field and original extra fields all in
552+
int currExtraFieldDataLength = ZipGenericExtraField.TotalSize(_cdUnknownExtraFields, _cdTrailingExtraFieldData?.Length ?? 0);
546553
int bigExtraFieldLength = (zip64ExtraField != null ? zip64ExtraField.TotalSize : 0)
547-
+ (_cdUnknownExtraFields != null ? ZipGenericExtraField.TotalSize(_cdUnknownExtraFields) : 0);
554+
+ currExtraFieldDataLength;
548555
ushort extraFieldLength;
549556
if (bigExtraFieldLength > ushort.MaxValue)
550557
{
@@ -561,7 +568,7 @@ internal void WriteCentralDirectoryFileHeader(bool forceWrite)
561568
long centralDirectoryHeaderLength = ZipCentralDirectoryFileHeader.FieldLocations.DynamicData
562569
+ _storedEntryNameBytes.Length
563570
+ (zip64ExtraField != null ? zip64ExtraField.TotalSize : 0)
564-
+ (_cdUnknownExtraFields != null ? ZipGenericExtraField.TotalSize(_cdUnknownExtraFields) : 0)
571+
+ currExtraFieldDataLength
565572
+ _fileComment.Length;
566573

567574
_archive.ArchiveStream.Seek(centralDirectoryHeaderLength, SeekOrigin.Current);
@@ -609,13 +616,11 @@ internal void WriteCentralDirectoryFileHeader(bool forceWrite)
609616
_archive.ArchiveStream.Write(cdStaticHeader);
610617
_archive.ArchiveStream.Write(_storedEntryNameBytes);
611618

612-
// write extra fields, and only write zip64ExtraField if we decided we need it (it's not null)
619+
// only write zip64ExtraField if we decided we need it (it's not null)
613620
zip64ExtraField?.WriteBlock(_archive.ArchiveStream);
614621

615-
if (_cdUnknownExtraFields != null)
616-
{
617-
ZipGenericExtraField.WriteAllBlocks(_cdUnknownExtraFields, _archive.ArchiveStream);
618-
}
622+
// write extra fields (and any malformed trailing data).
623+
ZipGenericExtraField.WriteAllBlocks(_cdUnknownExtraFields, _cdTrailingExtraFieldData ?? Array.Empty<byte>(), _archive.ArchiveStream);
619624

620625
if (_fileComment.Length > 0)
621626
{
@@ -626,7 +631,7 @@ internal void WriteCentralDirectoryFileHeader(bool forceWrite)
626631

627632
// throws exception if fails, will get called on every relevant entry before closing in update mode
628633
// can throw InvalidDataException
629-
internal void LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded()
634+
internal void LoadLocalHeaderExtraFieldIfNeeded()
630635
{
631636
// we should have made this exact call in _archive.Init through ThrowIfOpenable
632637
Debug.Assert(IsOpenable(false, true, out _));
@@ -635,8 +640,16 @@ internal void LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded()
635640
if (_originallyInArchive)
636641
{
637642
_archive.ArchiveStream.Seek(_offsetOfLocalHeader, SeekOrigin.Begin);
638-
_lhUnknownExtraFields = ZipLocalFileHeader.GetExtraFields(_archive.ArchiveStream);
643+
_lhUnknownExtraFields = ZipLocalFileHeader.GetExtraFields(_archive.ArchiveStream, out _lhTrailingExtraFieldData);
639644
}
645+
}
646+
647+
// throws exception if fails, will get called on every relevant entry before closing in update mode
648+
// can throw InvalidDataException
649+
internal void LoadCompressedBytesIfNeeded()
650+
{
651+
// we should have made this exact call in _archive.Init through ThrowIfOpenable
652+
Debug.Assert(IsOpenable(false, true, out _));
640653

641654
if (!_everOpenedForWrite && _originallyInArchive)
642655
{
@@ -979,8 +992,9 @@ private bool WriteLocalFileHeader(bool isEmptyFile, bool forceWrite)
979992
_offsetOfLocalHeader = _archive.ArchiveStream.Position;
980993

981994
// calculate extra field. if zip64 stuff + original extraField aren't going to fit, dump the original extraField, because this is more important
995+
int currExtraFieldDataLength = ZipGenericExtraField.TotalSize(_lhUnknownExtraFields, _lhTrailingExtraFieldData?.Length ?? 0);
982996
int bigExtraFieldLength = (zip64ExtraField != null ? zip64ExtraField.TotalSize : 0)
983-
+ (_lhUnknownExtraFields != null ? ZipGenericExtraField.TotalSize(_lhUnknownExtraFields) : 0);
997+
+ currExtraFieldDataLength;
984998
ushort extraFieldLength;
985999
if (bigExtraFieldLength > ushort.MaxValue)
9861000
{
@@ -1003,10 +1017,7 @@ private bool WriteLocalFileHeader(bool isEmptyFile, bool forceWrite)
10031017
_archive.ArchiveStream.Seek(zip64ExtraField.TotalSize, SeekOrigin.Current);
10041018
}
10051019

1006-
if (_lhUnknownExtraFields != null)
1007-
{
1008-
_archive.ArchiveStream.Seek(ZipGenericExtraField.TotalSize(_lhUnknownExtraFields), SeekOrigin.Current);
1009-
}
1020+
_archive.ArchiveStream.Seek(currExtraFieldDataLength, SeekOrigin.Current);
10101021
}
10111022
else
10121023
{
@@ -1029,8 +1040,7 @@ private bool WriteLocalFileHeader(bool isEmptyFile, bool forceWrite)
10291040
// Only when handling zip64
10301041
zip64ExtraField?.WriteBlock(_archive.ArchiveStream);
10311042

1032-
if (_lhUnknownExtraFields != null)
1033-
ZipGenericExtraField.WriteAllBlocks(_lhUnknownExtraFields, _archive.ArchiveStream);
1043+
ZipGenericExtraField.WriteAllBlocks(_lhUnknownExtraFields, _lhTrailingExtraFieldData ?? Array.Empty<byte>(), _archive.ArchiveStream);
10341044
}
10351045

10361046
return zip64ExtraField != null;
@@ -1252,10 +1262,12 @@ private void VersionToExtractAtLeast(ZipVersionNeededValues value)
12521262
if (_versionToExtract < value)
12531263
{
12541264
_versionToExtract = value;
1265+
Changes |= ZipArchive.ChangeState.FixedLengthMetadata;
12551266
}
12561267
if (_versionMadeBySpecification < value)
12571268
{
12581269
_versionMadeBySpecification = value;
1270+
Changes |= ZipArchive.ChangeState.FixedLengthMetadata;
12591271
}
12601272
}
12611273

src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ public static bool TryReadBlock(ReadOnlySpan<byte> bytes, out int bytesConsumed,
4848

4949
field._tag = BinaryPrimitives.ReadUInt16LittleEndian(bytes[FieldLocations.Tag..]);
5050
field._size = BinaryPrimitives.ReadUInt16LittleEndian(bytes[FieldLocations.Size..]);
51-
bytesConsumed += SizeOfHeader;
5251

5352
// not enough byte to read the data
5453
if ((bytes.Length - SizeOfHeader) < field._size)
@@ -57,11 +56,11 @@ public static bool TryReadBlock(ReadOnlySpan<byte> bytes, out int bytesConsumed,
5756
}
5857

5958
field._data = bytes.Slice(FieldLocations.DynamicData, field._size).ToArray();
60-
bytesConsumed += field._size;
59+
bytesConsumed = field.Size + SizeOfHeader;
6160
return true;
6261
}
6362

64-
public static List<ZipGenericExtraField> ParseExtraField(ReadOnlySpan<byte> extraFieldData)
63+
public static List<ZipGenericExtraField> ParseExtraField(ReadOnlySpan<byte> extraFieldData, out ReadOnlySpan<byte> trailingExtraFieldData)
6564
{
6665
List<ZipGenericExtraField> extraFields = new List<ZipGenericExtraField>();
6766
int totalBytesConsumed = 0;
@@ -71,25 +70,43 @@ public static List<ZipGenericExtraField> ParseExtraField(ReadOnlySpan<byte> extr
7170
totalBytesConsumed += currBytesConsumed;
7271
extraFields.Add(field);
7372
}
73+
// It's possible for some ZIP files to contain extra data which isn't a well-formed extra field. zipalign does this, padding the extra data
74+
// field with zeroes to align the start of the file data to an X-byte boundary. We need to preserve this data so that the recorded "extra data length"
75+
// fields in the central directory and local file headers are valid.
76+
// We need to account for this because zipalign is part of the build process for Android applications, and failing to do this will result in
77+
// .NET for Android generating corrupted .apk files and failing to build.
78+
trailingExtraFieldData = extraFieldData[totalBytesConsumed..];
7479

7580
return extraFields;
7681
}
7782

78-
public static int TotalSize(List<ZipGenericExtraField> fields)
83+
public static int TotalSize(List<ZipGenericExtraField>? fields, int trailingDataLength)
7984
{
80-
int size = 0;
81-
foreach (ZipGenericExtraField field in fields)
85+
int size = trailingDataLength;
86+
87+
if (fields != null)
8288
{
83-
size += field.Size + SizeOfHeader; //size is only size of data
89+
foreach (ZipGenericExtraField field in fields)
90+
{
91+
size += field.Size + SizeOfHeader; //size is only size of data
92+
}
8493
}
8594
return size;
8695
}
8796

88-
public static void WriteAllBlocks(List<ZipGenericExtraField> fields, Stream stream)
97+
public static void WriteAllBlocks(List<ZipGenericExtraField>? fields, ReadOnlySpan<byte> trailingExtraFieldData, Stream stream)
8998
{
90-
foreach (ZipGenericExtraField field in fields)
99+
if (fields != null)
100+
{
101+
foreach (ZipGenericExtraField field in fields)
102+
{
103+
field.WriteBlock(stream);
104+
}
105+
}
106+
107+
if (!trailingExtraFieldData.IsEmpty)
91108
{
92-
field.WriteBlock(stream);
109+
stream.Write(trailingExtraFieldData);
93110
}
94111
}
95112
}
@@ -514,7 +531,7 @@ internal readonly partial struct ZipLocalFileHeader
514531
public static ReadOnlySpan<byte> SignatureConstantBytes => [0x50, 0x4B, 0x03, 0x04];
515532
public const int SizeOfLocalHeader = 30;
516533

517-
public static List<ZipGenericExtraField> GetExtraFields(Stream stream)
534+
public static List<ZipGenericExtraField> GetExtraFields(Stream stream, out byte[] trailingData)
518535
{
519536
// assumes that TrySkipBlock has already been called, so we don't have to validate twice
520537

@@ -538,8 +555,9 @@ public static List<ZipGenericExtraField> GetExtraFields(Stream stream)
538555
stream.Seek(filenameLength, SeekOrigin.Current);
539556
stream.ReadExactly(extraFieldBuffer);
540557

541-
result = ZipGenericExtraField.ParseExtraField(extraFieldBuffer);
558+
result = ZipGenericExtraField.ParseExtraField(extraFieldBuffer, out ReadOnlySpan<byte> trailingDataSpan);
542559
Zip64ExtraField.RemoveZip64Blocks(result);
560+
trailingData = trailingDataSpan.ToArray();
543561

544562
return result;
545563
}
@@ -627,6 +645,7 @@ internal sealed partial class ZipCentralDirectoryFileHeader
627645
public byte[] Filename = [];
628646
public byte[] FileComment = [];
629647
public List<ZipGenericExtraField>? ExtraFields;
648+
public byte[]? TrailingExtraFieldData;
630649

631650
// if saveExtraFieldsAndComments is false, FileComment and ExtraFields will be null
632651
// in either case, the zip64 extra field info will be incorporated into other fields
@@ -718,14 +737,16 @@ public static bool TryReadBlock(ReadOnlySpan<byte> buffer, Stream furtherReads,
718737
zip64 = new();
719738
if (saveExtraFieldsAndComments)
720739
{
721-
header.ExtraFields = ZipGenericExtraField.ParseExtraField(zipExtraFields);
740+
header.ExtraFields = ZipGenericExtraField.ParseExtraField(zipExtraFields, out ReadOnlySpan<byte> trailingDataSpan);
722741
zip64 = Zip64ExtraField.GetAndRemoveZip64Block(header.ExtraFields,
723742
uncompressedSizeInZip64, compressedSizeInZip64,
724743
relativeOffsetInZip64, diskNumberStartInZip64);
744+
header.TrailingExtraFieldData = trailingDataSpan.ToArray();
725745
}
726746
else
727747
{
728748
header.ExtraFields = null;
749+
header.TrailingExtraFieldData = null;
729750
zip64 = Zip64ExtraField.GetJustZip64Block(zipExtraFields,
730751
uncompressedSizeInZip64, compressedSizeInZip64,
731752
relativeOffsetInZip64, diskNumberStartInZip64);

0 commit comments

Comments
 (0)