diff --git a/src/Microsoft.Data.Analysis/DataFrameBuffer.cs b/src/Microsoft.Data.Analysis/DataFrameBuffer.cs index 44a8d3faaa..352b853ddc 100644 --- a/src/Microsoft.Data.Analysis/DataFrameBuffer.cs +++ b/src/Microsoft.Data.Analysis/DataFrameBuffer.cs @@ -13,7 +13,7 @@ namespace Microsoft.Data.Analysis /// /// internal class DataFrameBuffer : ReadOnlyDataFrameBuffer - where T : struct + where T : unmanaged { private Memory _memory; diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs index 4020c6de7e..64b12c0607 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs @@ -8,7 +8,7 @@ namespace Microsoft.Data.Analysis { internal partial class PrimitiveColumnContainer - where T : struct + where T : unmanaged { public PrimitiveColumnContainer Add(PrimitiveColumnContainer right) { diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index df5a09c2a5..830440445e 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -17,7 +17,7 @@ namespace Microsoft.Data.Analysis /// /// internal partial class PrimitiveColumnContainer : IEnumerable - where T : struct + where T : unmanaged { public IList> Buffers = new List>(); @@ -90,6 +90,7 @@ public PrimitiveColumnContainer(ReadOnlyMemory buffer, ReadOnlyMemory(buffer, length); } Buffers.Add(dataBuffer); + int bitMapBufferLength = (length + 7) / 8; ReadOnlyDataFrameBuffer nullDataFrameBuffer; if (nullBitMap.IsEmpty) @@ -127,31 +128,7 @@ public PrimitiveColumnContainer(ReadOnlyMemory buffer, ReadOnlyMemory 0) - { - if (Buffers.Count == 0) - { - Buffers.Add(new DataFrameBuffer()); - NullBitMapBuffers.Add(new DataFrameBuffer()); - } - DataFrameBuffer lastBuffer = (DataFrameBuffer)Buffers[Buffers.Count - 1]; - if (lastBuffer.Length == ReadOnlyDataFrameBuffer.MaxCapacity) - { - lastBuffer = new DataFrameBuffer(); - Buffers.Add(lastBuffer); - NullBitMapBuffers.Add(new DataFrameBuffer()); - } - int allocatable = (int)Math.Min(length, ReadOnlyDataFrameBuffer.MaxCapacity); - lastBuffer.EnsureCapacity(allocatable); - DataFrameBuffer lastNullBitMapBuffer = (DataFrameBuffer)(NullBitMapBuffers[NullBitMapBuffers.Count - 1]); - int nullBufferAllocatable = (allocatable + 7) / 8; - lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable); - lastBuffer.Length = allocatable; - lastNullBitMapBuffer.Length = nullBufferAllocatable; - length -= allocatable; - Length += lastBuffer.Length; - NullCount += lastBuffer.Length; - } + AppendMany(null, length); } public void Resize(long length) @@ -168,16 +145,14 @@ public void Append(T? value) Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - int bufferIndex = Buffers.Count - 1; - ReadOnlyDataFrameBuffer lastBuffer = Buffers[bufferIndex]; - if (lastBuffer.Length == ReadOnlyDataFrameBuffer.MaxCapacity) + + if (Buffers[Buffers.Count - 1].Length == ReadOnlyDataFrameBuffer.MaxCapacity) { - lastBuffer = new DataFrameBuffer(); - Buffers.Add(lastBuffer); + Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - DataFrameBuffer mutableLastBuffer = DataFrameBuffer.GetMutableBuffer(lastBuffer); - Buffers[bufferIndex] = mutableLastBuffer; + + DataFrameBuffer mutableLastBuffer = Buffers.GetOrCreateMutable(Buffers.Count - 1); mutableLastBuffer.Append(value ?? default); SetValidityBit(Length, value.HasValue); Length++; @@ -190,64 +165,68 @@ public void AppendMany(T? value, long count) NullCount += count; } - while (count > 0) + var remaining = count; + while (remaining > 0) { if (Buffers.Count == 0) { Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - int bufferIndex = Buffers.Count - 1; - ReadOnlyDataFrameBuffer lastBuffer = Buffers[bufferIndex]; - if (lastBuffer.Length == ReadOnlyDataFrameBuffer.MaxCapacity) + + if (Buffers[Buffers.Count - 1].Length == ReadOnlyDataFrameBuffer.MaxCapacity) { - lastBuffer = new DataFrameBuffer(); - Buffers.Add(lastBuffer); + Buffers.Add(new DataFrameBuffer()); NullBitMapBuffers.Add(new DataFrameBuffer()); } - DataFrameBuffer mutableLastBuffer = DataFrameBuffer.GetMutableBuffer(lastBuffer); - Buffers[bufferIndex] = mutableLastBuffer; - int allocatable = (int)Math.Min(count, ReadOnlyDataFrameBuffer.MaxCapacity); + + DataFrameBuffer mutableLastBuffer = Buffers.GetOrCreateMutable(Buffers.Count - 1); + int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer.MaxCapacity); mutableLastBuffer.EnsureCapacity(allocatable); - mutableLastBuffer.RawSpan.Slice(lastBuffer.Length, allocatable).Fill(value ?? default); + + DataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers.GetOrCreateMutable(NullBitMapBuffers.Count - 1); + int nullBufferAllocatable = (allocatable + 7) / 8; + lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable); + + mutableLastBuffer.Length += allocatable; + lastNullBitMapBuffer.Length += nullBufferAllocatable; Length += allocatable; - int nullBitMapBufferIndex = NullBitMapBuffers.Count - 1; - ReadOnlyDataFrameBuffer lastNullBitMapBuffer = NullBitMapBuffers[nullBitMapBufferIndex]; - DataFrameBuffer mutableLastNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(lastNullBitMapBuffer); - NullBitMapBuffers[nullBitMapBufferIndex] = mutableLastNullBitMapBuffer; - int nullBitMapAllocatable = (int)(((uint)allocatable) / 8) + 1; - mutableLastNullBitMapBuffer.EnsureCapacity(nullBitMapAllocatable); - _modifyNullCountWhileIndexing = false; - for (long i = Length - count; i < Length; i++) + if (value.HasValue) { - SetValidityBit(i, value.HasValue ? true : false); + mutableLastBuffer.RawSpan.Slice(mutableLastBuffer.Length - allocatable, allocatable).Fill(value ?? default); + + _modifyNullCountWhileIndexing = false; + for (long i = Length - allocatable; i < Length; i++) + { + SetValidityBit(i, value.HasValue); + } + _modifyNullCountWhileIndexing = true; } - _modifyNullCountWhileIndexing = true; - count -= allocatable; + + + remaining -= allocatable; } } public void ApplyElementwise(Func func) { + var bufferMaxCapacity = ReadOnlyDataFrameBuffer.MaxCapacity; for (int b = 0; b < Buffers.Count; b++) { - ReadOnlyDataFrameBuffer buffer = Buffers[b]; - long prevLength = checked(Buffers[0].Length * b); - DataFrameBuffer mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - Buffers[b] = mutableBuffer; - Span span = mutableBuffer.Span; - DataFrameBuffer mutableNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(NullBitMapBuffers[b]); - NullBitMapBuffers[b] = mutableNullBitMapBuffer; - Span nullBitMapSpan = mutableNullBitMapBuffer.Span; - for (int i = 0; i < span.Length; i++) + long prevLength = checked(bufferMaxCapacity * b); + + Span mutableBuffer = Buffers.GetOrCreateMutable(b).Span; + Span mutableNullBitMapBuffer = NullBitMapBuffers.GetOrCreateMutable(b).Span; + + for (int i = 0; i < mutableBuffer.Length; i++) { long curIndex = i + prevLength; - bool isValid = IsValid(nullBitMapSpan, i); - T? value = func(isValid ? span[i] : default(T?), curIndex); - span[i] = value.GetValueOrDefault(); - SetValidityBit(nullBitMapSpan, i, value != null); + bool isValid = IsValid(mutableNullBitMapBuffer, i); + T? value = func(isValid ? mutableBuffer[i] : null, curIndex); + mutableBuffer[i] = value.GetValueOrDefault(); + SetValidityBit(mutableNullBitMapBuffer, i, value != null); } } } @@ -255,25 +234,22 @@ public void ApplyElementwise(Func func) public void Apply(Func func, PrimitiveColumnContainer resultContainer) where TResult : unmanaged { + var bufferMaxCapacity = ReadOnlyDataFrameBuffer.MaxCapacity; for (int b = 0; b < Buffers.Count; b++) { - ReadOnlyDataFrameBuffer sourceBuffer = Buffers[b]; - ReadOnlySpan sourceNullBitMap = NullBitMapBuffers[b].ReadOnlySpan; + long prevLength = checked(bufferMaxCapacity * b); + var sourceBuffer = Buffers[b]; + var sourceNullBitMap = NullBitMapBuffers[b].ReadOnlySpan; - ReadOnlyDataFrameBuffer resultBuffer = resultContainer.Buffers[b]; - DataFrameBuffer resultMutableBuffer = DataFrameBuffer.GetMutableBuffer(resultBuffer); - resultContainer.Buffers[b] = resultMutableBuffer; - Span resultSpan = resultMutableBuffer.Span; - DataFrameBuffer resultMutableNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(resultContainer.NullBitMapBuffers[b]); - resultContainer.NullBitMapBuffers[b] = resultMutableNullBitMapBuffer; - Span resultNullBitMapSpan = resultMutableNullBitMapBuffer.Span; + Span mutableResultBuffer = resultContainer.Buffers.GetOrCreateMutable(b).Span; + Span mutableResultNullBitMapBuffers = resultContainer.NullBitMapBuffers.GetOrCreateMutable(b).Span; - for (int i = 0; i < Buffers[b].Length; i++) + for (int i = 0; i < sourceBuffer.Length; i++) { bool isValid = IsValid(sourceNullBitMap, i); - TResult? value = func(isValid ? sourceBuffer[i] : default(T?)); - resultSpan[i] = value.GetValueOrDefault(); - resultContainer.SetValidityBit(resultNullBitMapSpan, i, value != null); + TResult? value = func(isValid ? sourceBuffer[i] : null); + mutableResultBuffer[i] = value.GetValueOrDefault(); + resultContainer.SetValidityBit(mutableResultNullBitMapBuffers, i, value != null); } } } @@ -440,11 +416,10 @@ public T? this[long rowIndex] { int arrayIndex = GetArrayContainingRowIndex(rowIndex); rowIndex = rowIndex - arrayIndex * ReadOnlyDataFrameBuffer.MaxCapacity; - ReadOnlyDataFrameBuffer buffer = Buffers[arrayIndex]; - DataFrameBuffer mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); - Buffers[arrayIndex] = mutableBuffer; - DataFrameBuffer mutableNullBuffer = DataFrameBuffer.GetMutableBuffer(NullBitMapBuffers[arrayIndex]); - NullBitMapBuffers[arrayIndex] = mutableNullBuffer; + + Buffers.GetOrCreateMutable(arrayIndex); + NullBitMapBuffers.GetOrCreateMutable(arrayIndex); + if (value.HasValue) { Buffers[arrayIndex][(int)rowIndex] = value.Value; diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs new file mode 100644 index 0000000000..22ef32d979 --- /dev/null +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainerHelpers.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; + +namespace Microsoft.Data.Analysis +{ + internal static class PrimitiveColumnContainerHelpers + { + internal static DataFrameBuffer GetOrCreateMutable(this IList> bufferList, int index) + where T : unmanaged + { + ReadOnlyDataFrameBuffer sourceBuffer = bufferList[index]; + DataFrameBuffer mutableBuffer = sourceBuffer as DataFrameBuffer; + + if (mutableBuffer == null) + { + mutableBuffer = DataFrameBuffer.GetMutableBuffer(sourceBuffer); + bufferList[index] = mutableBuffer; + } + + + return mutableBuffer; + } + } +} diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs index f432805185..5498e9bae0 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs @@ -11,7 +11,7 @@ namespace Microsoft.Data.Analysis { internal interface IPrimitiveDataFrameColumnArithmetic - where T : struct + where T : unmanaged { void Add(PrimitiveColumnContainer left, PrimitiveColumnContainer right); void Add(PrimitiveColumnContainer column, T scalar); @@ -54,7 +54,7 @@ internal interface IPrimitiveDataFrameColumnArithmetic } internal static class PrimitiveDataFrameColumnArithmetic - where T : struct + where T : unmanaged { public static IPrimitiveDataFrameColumnArithmetic Instance { get; } = PrimitiveDataFrameColumnArithmetic.GetArithmetic(); } @@ -62,7 +62,7 @@ internal static class PrimitiveDataFrameColumnArithmetic internal static class PrimitiveDataFrameColumnArithmetic { public static IPrimitiveDataFrameColumnArithmetic GetArithmetic() - where T : struct + where T : unmanaged { if (typeof(T) == typeof(bool)) { diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs index a4c5d73b0d..4349caf4b7 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs @@ -12,7 +12,7 @@ namespace Microsoft.Data.Analysis { internal interface IPrimitiveColumnComputation - where T : struct + where T : unmanaged { void Abs(PrimitiveColumnContainer column); void All(PrimitiveColumnContainer column, out bool ret); @@ -37,7 +37,7 @@ internal interface IPrimitiveColumnComputation } internal static class PrimitiveColumnComputation - where T : struct + where T : unmanaged { public static IPrimitiveColumnComputation Instance { get; } = PrimitiveColumnComputation.GetComputation(); } @@ -45,7 +45,7 @@ internal static class PrimitiveColumnComputation internal static class PrimitiveColumnComputation { public static IPrimitiveColumnComputation GetComputation() - where T : struct + where T : unmanaged { if (typeof(T) == typeof(bool)) { diff --git a/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs b/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs index 59394eed8e..2bc41ebe51 100644 --- a/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs +++ b/src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs @@ -15,7 +15,7 @@ namespace Microsoft.Data.Analysis /// /// internal class ReadOnlyDataFrameBuffer - where T : struct + where T : unmanaged { private readonly ReadOnlyMemory _readOnlyBuffer;