Skip to content

Commit d4b6861

Browse files
author
Paul Westcott
committed
Split Array.countBy/groupBy by ValueType/RefType
1 parent 02e6d42 commit d4b6861

File tree

2 files changed

+55
-19
lines changed

2 files changed

+55
-19
lines changed

src/fsharp/FSharp.Core/array.fs

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -172,24 +172,39 @@ namespace Microsoft.FSharp.Collections
172172

173173
Microsoft.FSharp.Primitives.Basics.Array.subUnchecked 0 count array
174174

175-
[<CompiledName("CountBy")>]
176-
let countBy projection (array:'T[]) =
177-
checkNonNull "array" array
178-
let dict = new Dictionary<Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>,int>(Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer)
175+
let countByImpl (comparer:IEqualityComparer<'SafeKey>) (projection:'T->'SafeKey) (getKey:'SafeKey->'Key) (array:'T[]) =
176+
let dict = Dictionary comparer
179177

180178
// Build the groupings
181179
for v in array do
182-
let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection v)
180+
let key = projection v
183181
let mutable prev = Unchecked.defaultof<_>
184182
if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1
185183

186184
let res = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count
187185
let mutable i = 0
188186
for group in dict do
189-
res.[i] <- group.Key.Value, group.Value
187+
res.[i] <- getKey group.Key, group.Value
190188
i <- i + 1
191189
res
192190

191+
// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
192+
let countByValueType (projection:'T->'Key) (array:'T[]) = countByImpl HashIdentity.Structural<'Key> projection id array
193+
194+
// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
195+
let countByRefType (projection:'T->'Key) (array:'T[]) = countByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection t)) (fun sb -> sb.Value) array
196+
197+
[<CompiledName("CountBy")>]
198+
let countBy (projection:'T->'Key) (array:'T[]) =
199+
checkNonNull "array" array
200+
#if FX_ATLEAST_40
201+
if typeof<'Key>.IsValueType
202+
then countByValueType projection array
203+
else countByRefType projection array
204+
#else
205+
countByRefType projection array
206+
#endif
207+
193208
[<CompiledName("Append")>]
194209
let append (array1:'T[]) (array2:'T[]) =
195210
checkNonNull "array1" array1
@@ -408,32 +423,53 @@ namespace Microsoft.FSharp.Collections
408423
let rec loop i = i >= len1 || (f.Invoke(array1.[i], array2.[i]) && loop (i+1))
409424
loop 0
410425

411-
[<CompiledName("GroupBy")>]
412-
let groupBy keyf (array: 'T[]) =
413-
checkNonNull "array" array
414-
let dict = new Dictionary<RuntimeHelpers.StructBox<'Key>,ResizeArray<'T>>(RuntimeHelpers.StructBox<'Key>.Comparer)
426+
let groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (array: 'T[]) =
427+
let dict = Dictionary<_,ResizeArray<_>> comparer
428+
429+
// Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying
430+
// for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the
431+
// Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. Having it larger means
432+
// that we won't be having as many re-allocations. The ResizeArray is destroyed at the end anyway.
433+
let initialBucketSize = 4
415434

416435
// Build the groupings
417436
for i = 0 to (array.Length - 1) do
418437
let v = array.[i]
419-
let key = RuntimeHelpers.StructBox (keyf v)
420-
let ok, prev = dict.TryGetValue(key)
421-
if ok then
422-
prev.Add(v)
438+
let key = keyf v
439+
let mutable prev = Unchecked.defaultof<_>
440+
if dict.TryGetValue(key, &prev) then
441+
prev.Add v
423442
else
424-
let prev = new ResizeArray<'T>(1)
443+
let prev = ResizeArray initialBucketSize
425444
dict.[key] <- prev
426-
prev.Add(v)
445+
prev.Add v
427446

428447
// Return the array-of-arrays.
429448
let result = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count
430449
let mutable i = 0
431450
for group in dict do
432-
result.[i] <- group.Key.Value, group.Value.ToArray()
451+
result.[i] <- getKey group.Key, group.Value.ToArray()
433452
i <- i + 1
434453

435454
result
436455

456+
// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
457+
let groupByValueType (keyf:'T->'Key) (array:'T[]) = groupByImpl HashIdentity.Structural<'Key> keyf id array
458+
459+
// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
460+
let groupByRefType (keyf:'T->'Key) (array:'T[]) = groupByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (keyf t)) (fun sb -> sb.Value) array
461+
462+
[<CompiledName("GroupBy")>]
463+
let groupBy (keyf:'T->'Key) (array:'T[]) =
464+
checkNonNull "array" array
465+
#if FX_ATLEAST_40
466+
if typeof<'Key>.IsValueType
467+
then groupByValueType keyf array
468+
else groupByRefType keyf array
469+
#else
470+
groupByRefType keyf array
471+
#endif
472+
437473
[<CompiledName("Pick")>]
438474
let pick f (array: _[]) =
439475
checkNonNull "array" array

src/fsharp/FSharp.Core/list.fs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ namespace Microsoft.FSharp.Collections
7070
then countByValueType projection list
7171
else countByRefType projection list
7272
#else
73-
countByRefType projection source
73+
countByRefType projection list
7474
#endif
7575

7676
[<CompiledName("Map")>]
@@ -492,7 +492,7 @@ namespace Microsoft.FSharp.Collections
492492
then groupByValueType keyf list
493493
else groupByRefType keyf list
494494
#else
495-
groupByRefType keyf source
495+
groupByRefType keyf list
496496
#endif
497497

498498
[<CompiledName("Partition")>]

0 commit comments

Comments
 (0)