Skip to content

Commit 201e080

Browse files
committed
Map only slices of idx files into memory
This should help avoid OOMs thrown when operating on repos with very large git database files.
1 parent d146204 commit 201e080

1 file changed

Lines changed: 51 additions & 18 deletions

File tree

src/NerdBank.GitVersioning/ManagedGit/GitPackIndexMappedReader.cs

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright (c) .NET Foundation and Contributors. All rights reserved.
22
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
33

4+
#nullable enable
5+
46
using System.Buffers.Binary;
57
using System.Diagnostics;
68
using System.IO.MemoryMappedFiles;
@@ -14,16 +16,18 @@ namespace Nerdbank.GitVersioning.ManagedGit;
1416
public unsafe class GitPackIndexMappedReader : GitPackIndexReader
1517
{
1618
private readonly MemoryMappedFile file;
17-
private readonly MemoryMappedViewAccessor accessor;
1819

1920
// The fanout table consists of
2021
// 256 4-byte network byte order integers.
2122
// The N-th entry of this table records the number of objects in the corresponding pack,
2223
// the first byte of whose object name is less than or equal to N.
2324
private readonly int[] fanoutTable = new int[257];
25+
private readonly ulong fileLength;
2426

25-
private readonly byte* ptr;
2627
private bool initialized;
28+
private MemoryMappedViewAccessor? accessor;
29+
private ulong accessorOffset;
30+
private ulong accessorSize;
2731

2832
/// <summary>
2933
/// Initializes a new instance of the <see cref="GitPackIndexMappedReader"/> class.
@@ -38,17 +42,8 @@ public GitPackIndexMappedReader(FileStream stream)
3842
throw new ArgumentNullException(nameof(stream));
3943
}
4044

45+
this.fileLength = (ulong)stream.Length;
4146
this.file = MemoryMappedFile.CreateFromFile(stream, mapName: null, capacity: 0, MemoryMappedFileAccess.Read, HandleInheritability.None, leaveOpen: false);
42-
this.accessor = this.file.CreateViewAccessor(0, 0, MemoryMappedFileAccess.Read);
43-
this.accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref this.ptr);
44-
}
45-
46-
private ReadOnlySpan<byte> Value
47-
{
48-
get
49-
{
50-
return new ReadOnlySpan<byte>(this.ptr, (int)this.accessor.Capacity);
51-
}
5247
}
5348

5449
/// <inheritdoc/>
@@ -69,7 +64,7 @@ public override (long? Offset, GitObjectId? ObjectId) GetOffset(Span<byte> objec
6964
int order = 0;
7065

7166
int tableSize = 20 * (packEnd - packStart + 1);
72-
ReadOnlySpan<byte> table = this.Value.Slice(4 + 4 + (256 * 4) + (20 * packStart), tableSize);
67+
ReadOnlySpan<byte> table = this.GetSpan((ulong)(4 + 4 + (256 * 4) + (20 * packStart)), tableSize);
7368

7469
int originalPackStart = packStart;
7570

@@ -117,7 +112,7 @@ public override (long? Offset, GitObjectId? ObjectId) GetOffset(Span<byte> objec
117112
// Get the offset value. It's located at:
118113
// 4 (header) + 4 (version) + 256 * 4 (fanout table) + 20 * objectCount (SHA1 object name table) + 4 * objectCount (CRC32) + 4 * i (offset values)
119114
int offsetTableStart = 4 + 4 + (256 * 4) + (20 * objectCount) + (4 * objectCount);
120-
ReadOnlySpan<byte> offsetBuffer = this.Value.Slice(offsetTableStart + (4 * (i + originalPackStart)), 4);
115+
ReadOnlySpan<byte> offsetBuffer = this.GetSpan((ulong)(offsetTableStart + (4 * (i + originalPackStart))), 4);
121116
uint offset = BinaryPrimitives.ReadUInt32BigEndian(offsetBuffer);
122117

123118
if (offsetBuffer[0] < 128)
@@ -130,7 +125,7 @@ public override (long? Offset, GitObjectId? ObjectId) GetOffset(Span<byte> objec
130125
// which follows the table of 4-byte offset entries: "large offsets are encoded as an index into the next table with the msbit set."
131126
offset = offset & 0x7FFFFFFF;
132127

133-
offsetBuffer = this.Value.Slice(offsetTableStart + (4 * objectCount) + (8 * (int)offset), 8);
128+
offsetBuffer = this.GetSpan((ulong)(offsetTableStart + (4 * objectCount) + (8 * (int)offset)), 8);
134129
long offset64 = BinaryPrimitives.ReadInt64BigEndian(offsetBuffer);
135130
return (offset64, GitObjectId.Parse(table.Slice(20 * i, 20)));
136131
}
@@ -139,22 +134,60 @@ public override (long? Offset, GitObjectId? ObjectId) GetOffset(Span<byte> objec
139134
/// <inheritdoc/>
140135
public override void Dispose()
141136
{
142-
this.accessor.Dispose();
137+
this.accessor?.Dispose();
138+
this.accessor = null;
143139
this.file.Dispose();
144140
}
145141

142+
private unsafe ReadOnlySpan<byte> GetSpan(ulong offset, int length)
143+
{
144+
checked
145+
{
146+
// If the request is for a window that we have not currently mapped, throw away what we have.
147+
if (this.accessor is not null && (this.accessorOffset > offset || this.accessorOffset + this.accessorSize < offset + (ulong)length))
148+
{
149+
this.accessor.Dispose();
150+
this.accessor = null;
151+
}
152+
153+
if (this.accessor is null)
154+
{
155+
const int minimumLength = 10 * 1024 * 1024;
156+
uint windowSize = (uint)Math.Min((ulong)Math.Max(minimumLength, length), this.fileLength);
157+
158+
// Push window 'to the left' if our preferred minimum size doesn't fit when we start at the offset requested.
159+
ulong actualOffset = offset + windowSize > this.fileLength ? this.fileLength - windowSize : offset;
160+
161+
this.accessor = this.file.CreateViewAccessor((long)actualOffset, windowSize, MemoryMappedFileAccess.Read);
162+
163+
// Record the *actual* offset into the file that the pointer to native memory points at.
164+
// This may be earlier in the file than we requested, and if so, go ahead and take advantage of that.
165+
this.accessorOffset = actualOffset - (ulong)this.accessor.PointerOffset;
166+
167+
// Also record the *actual* length of the mapped memory, again so we can take full advantage before reallocating the view.
168+
this.accessorSize = this.accessor.SafeMemoryMappedViewHandle.ByteLength;
169+
}
170+
171+
Debug.Assert(offset >= (ulong)this.accessor.PointerOffset);
172+
byte* ptr = null;
173+
this.accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref ptr);
174+
return new ReadOnlySpan<byte>(ptr + offset - this.accessorOffset, length);
175+
}
176+
}
177+
146178
private void Initialize()
147179
{
148180
if (!this.initialized)
149181
{
150-
ReadOnlySpan<byte> value = this.Value;
182+
const int fanoutTableLength = 256;
183+
ReadOnlySpan<byte> value = this.GetSpan(0, 4 + (4 * fanoutTableLength) + 4);
151184

152185
ReadOnlySpan<byte> header = value.Slice(0, 4);
153186
int version = BinaryPrimitives.ReadInt32BigEndian(value.Slice(4, 4));
154187
Debug.Assert(header.SequenceEqual(Header));
155188
Debug.Assert(version == 2);
156189

157-
for (int i = 1; i <= 256; i++)
190+
for (int i = 1; i <= fanoutTableLength; i++)
158191
{
159192
this.fanoutTable[i] = BinaryPrimitives.ReadInt32BigEndian(value.Slice(4 + (4 * i), 4));
160193
}

0 commit comments

Comments
 (0)