Skip to content

Remove StructBox for Value Types #549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 53 additions & 17 deletions src/fsharp/FSharp.Core/array.fs
Original file line number Diff line number Diff line change
Expand Up @@ -172,24 +172,39 @@ namespace Microsoft.FSharp.Collections

Microsoft.FSharp.Primitives.Basics.Array.subUnchecked 0 count array

[<CompiledName("CountBy")>]
let countBy projection (array:'T[]) =
checkNonNull "array" array
let dict = new Dictionary<Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>,int>(Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer)
let inline countByImpl (comparer:IEqualityComparer<'SafeKey>) (projection:'T->'SafeKey) (getKey:'SafeKey->'Key) (array:'T[]) =
let dict = Dictionary comparer

// Build the groupings
for v in array do
let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection v)
let key = projection v
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For these guys I would consider naming the value safeKey, to match the given type name. As it stands, the value key does not have type 'Key, nor could it be returned by getKey...

Similarly throughout.

let mutable prev = Unchecked.defaultof<_>
if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1

let res = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count
let mutable i = 0
for group in dict do
res.[i] <- group.Key.Value, group.Value
res.[i] <- getKey group.Key, group.Value
i <- i + 1
res

// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
let countByValueType (projection:'T->'Key) (array:'T[]) = countByImpl HashIdentity.Structural<'Key> projection id array

// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
let countByRefType (projection:'T->'Key) (array:'T[]) = countByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection t)) (fun sb -> sb.Value) array

[<CompiledName("CountBy")>]
let countBy (projection:'T->'Key) (array:'T[]) =
checkNonNull "array" array
#if FX_ATLEAST_40
if typeof<'Key>.IsValueType
then countByValueType projection array
else countByRefType projection array
#else
countByRefType projection array
#endif

[<CompiledName("Append")>]
let append (array1:'T[]) (array2:'T[]) =
checkNonNull "array1" array1
Expand Down Expand Up @@ -408,32 +423,53 @@ namespace Microsoft.FSharp.Collections
let rec loop i = i >= len1 || (f.Invoke(array1.[i], array2.[i]) && loop (i+1))
loop 0

[<CompiledName("GroupBy")>]
let groupBy keyf (array: 'T[]) =
checkNonNull "array" array
let dict = new Dictionary<RuntimeHelpers.StructBox<'Key>,ResizeArray<'T>>(RuntimeHelpers.StructBox<'Key>.Comparer)
let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (array: 'T[]) =
let dict = Dictionary<_,ResizeArray<_>> comparer

// Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying
// for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the
// Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. Having it larger means
// that we won't be having as many re-allocations. The ResizeArray is destroyed at the end anyway.
let initialBucketSize = 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

List<'t> defaults to 0 capacity, then grows to size 4 on the first Add. The initial length-0 backing store is a shared static guy, so no allocation is needed. Thus using explicit initial capacity of 4 results in exactly the same allocation profile as using the default constructor 😄

Using 1 as the default capacity potentially pays off if most buckets wind up with only 1 element, but as you mention it's worse if it has to grow a couple times. Without some serious usage pattern research I don't think there's much point in guessing about something like that.

So my recommendation would be to either leave it as-is, or use default ctor ResizeArray()


// Build the groupings
for i = 0 to (array.Length - 1) do
let v = array.[i]
let key = RuntimeHelpers.StructBox (keyf v)
let ok, prev = dict.TryGetValue(key)
if ok then
prev.Add(v)
let key = keyf v
let mutable prev = Unchecked.defaultof<_>
if dict.TryGetValue(key, &prev) then
prev.Add v
else
let prev = new ResizeArray<'T>(1)
let prev = ResizeArray initialBucketSize
dict.[key] <- prev
prev.Add(v)
prev.Add v

// Return the array-of-arrays.
let result = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count
let mutable i = 0
for group in dict do
result.[i] <- group.Key.Value, group.Value.ToArray()
result.[i] <- getKey group.Key, group.Value.ToArray()
i <- i + 1

result

// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
let groupByValueType (keyf:'T->'Key) (array:'T[]) = groupByImpl HashIdentity.Structural<'Key> keyf id array

// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
let groupByRefType (keyf:'T->'Key) (array:'T[]) = groupByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (keyf t)) (fun sb -> sb.Value) array

[<CompiledName("GroupBy")>]
let groupBy (keyf:'T->'Key) (array:'T[]) =
checkNonNull "array" array
#if FX_ATLEAST_40
if typeof<'Key>.IsValueType
then groupByValueType keyf array
else groupByRefType keyf array
#else
groupByRefType keyf array
#endif

[<CompiledName("Pick")>]
let pick f (array: _[]) =
checkNonNull "array" array
Expand Down
46 changes: 29 additions & 17 deletions src/fsharp/FSharp.Core/fslib-extra-pervasives.fs
Original file line number Diff line number Diff line change
Expand Up @@ -30,65 +30,77 @@ module ExtraTopLevelOperators =
[<CompiledName("CreateSet")>]
let set l = Collections.Set.ofSeq l

[<CompiledName("CreateDictionary")>]
let dict l =
// Use a dictionary (this requires hashing and equality on the key type)
// Wrap keys in a StructBox in case they are null (when System.Collections.Generic.Dictionary fails).
let t = new Dictionary<RuntimeHelpers.StructBox<'Key>,_>(RuntimeHelpers.StructBox<'Key>.Comparer)
let inline dictImpl (comparer:IEqualityComparer<'SafeKey>) (makeSafeKey:'Key->'SafeKey) (getKey:'SafeKey->'Key) (l:seq<'Key*'T>) =
let t = Dictionary comparer
for (k,v) in l do
t.[RuntimeHelpers.StructBox(k)] <- v
t.[makeSafeKey k] <- v
let d = (t :> IDictionary<_,_>)
let c = (t :> ICollection<_>)
// Give a read-only view of the dictionary
{ new IDictionary<'Key, 'T> with
member s.Item
with get x = d.[RuntimeHelpers.StructBox(x)]
with get x = d.[makeSafeKey x]
and set x v = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated)))
member s.Keys =
let keys = d.Keys
{ new ICollection<'Key> with
member s.Add(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated)));
member s.Clear() = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated)));
member s.Remove(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated)));
member s.Contains(x) = keys.Contains(RuntimeHelpers.StructBox(x))
member s.Contains(x) = keys.Contains(makeSafeKey x)
member s.CopyTo(arr,i) =
let mutable n = 0
for k in keys do
arr.[i+n] <- k.Value
arr.[i+n] <- getKey k
n <- n + 1
member s.IsReadOnly = true
member s.Count = keys.Count
interface IEnumerable<'Key> with
member s.GetEnumerator() = (keys |> Seq.map (fun v -> v.Value)).GetEnumerator()
member s.GetEnumerator() = (keys |> Seq.map getKey).GetEnumerator()
interface System.Collections.IEnumerable with
member s.GetEnumerator() = ((keys |> Seq.map (fun v -> v.Value)) :> System.Collections.IEnumerable).GetEnumerator() }
member s.GetEnumerator() = ((keys |> Seq.map getKey) :> System.Collections.IEnumerable).GetEnumerator() }

member s.Values = d.Values
member s.Add(k,v) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated)))
member s.ContainsKey(k) = d.ContainsKey(RuntimeHelpers.StructBox(k))
member s.ContainsKey(k) = d.ContainsKey(makeSafeKey k)
member s.TryGetValue(k,r) =
let key = RuntimeHelpers.StructBox(k)
let key = makeSafeKey k
if d.ContainsKey(key) then (r <- d.[key]; true) else false
member s.Remove(k : 'Key) = (raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated))) : bool)
interface ICollection<KeyValuePair<'Key, 'T>> with
member s.Add(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated)));
member s.Clear() = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated)));
member s.Remove(x) = raise (NotSupportedException(SR.GetString(SR.thisValueCannotBeMutated)));
member s.Contains(KeyValue(k,v)) = c.Contains(KeyValuePair<_,_>(RuntimeHelpers.StructBox(k),v))
member s.Contains(KeyValue(k,v)) = c.Contains(KeyValuePair<_,_>(makeSafeKey k,v))
member s.CopyTo(arr,i) =
let mutable n = 0
for (KeyValue(k,v)) in c do
arr.[i+n] <- KeyValuePair<_,_>(k.Value,v)
arr.[i+n] <- KeyValuePair<_,_>(getKey k,v)
n <- n + 1
member s.IsReadOnly = true
member s.Count = c.Count
interface IEnumerable<KeyValuePair<'Key, 'T>> with
member s.GetEnumerator() =
(c |> Seq.map (fun (KeyValue(k,v)) -> KeyValuePair<_,_>(k.Value,v))).GetEnumerator()
(c |> Seq.map (fun (KeyValue(k,v)) -> KeyValuePair<_,_>(getKey k,v))).GetEnumerator()
interface System.Collections.IEnumerable with
member s.GetEnumerator() =
((c |> Seq.map (fun (KeyValue(k,v)) -> KeyValuePair<_,_>(k.Value,v))) :> System.Collections.IEnumerable).GetEnumerator() }
((c |> Seq.map (fun (KeyValue(k,v)) -> KeyValuePair<_,_>(getKey k,v))) :> System.Collections.IEnumerable).GetEnumerator() }

// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
let dictValueType (l:seq<'Key*'T>) = dictImpl HashIdentity.Structural<'Key> id id l

// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
let dictRefType (l:seq<'Key*'T>) = dictImpl RuntimeHelpers.StructBox<'Key>.Comparer (fun k -> RuntimeHelpers.StructBox k) (fun sb -> sb.Value) l

[<CompiledName("CreateDictionary")>]
let dict (l:seq<'Key*'T>) =
#if FX_ATLEAST_40
if typeof<'Key>.IsValueType
then dictValueType l
else dictRefType l
#else
dictRefType l
#endif

let getArray (vals : seq<'T>) =
match vals with
Expand Down
66 changes: 51 additions & 15 deletions src/fsharp/FSharp.Core/list.fs
Original file line number Diff line number Diff line change
Expand Up @@ -41,23 +41,38 @@ namespace Microsoft.FSharp.Collections
[<CompiledName("Concat")>]
let concat lists = Microsoft.FSharp.Primitives.Basics.List.concat lists

[<CompiledName("CountBy")>]
let countBy projection (list:'T list) =
let dict = new Dictionary<Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>,int>(Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer)
let inline countByImpl (comparer:IEqualityComparer<'SafeKey>) (projection:'T->'SafeKey) (getKey:'SafeKey->'Key) (list:'T list) =
let dict = Dictionary comparer
let rec loop srcList =
match srcList with
| [] -> ()
| h::t ->
let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection h)
let key = projection h
let mutable prev = 0
if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1
loop t
loop list
let mutable result = []
for group in dict do
result <- (group.Key.Value, group.Value) :: result
result <- (getKey group.Key, group.Value) :: result
result |> rev

// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
let countByValueType (projection:'T->'Key) (list:'T list) = countByImpl HashIdentity.Structural<'Key> projection id list

// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
let countByRefType (projection:'T->'Key) (list:'T list) = countByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection t)) (fun sb -> sb.Value) list

[<CompiledName("CountBy")>]
let countBy (projection:'T->'Key) (list:'T list) =
#if FX_ATLEAST_40
if typeof<'Key>.IsValueType
then countByValueType projection list
else countByRefType projection list
#else
countByRefType projection list
#endif

[<CompiledName("Map")>]
let map f list = Microsoft.FSharp.Primitives.Basics.List.map f list

Expand Down Expand Up @@ -434,31 +449,52 @@ namespace Microsoft.FSharp.Collections
[<CompiledName("Where")>]
let where f x = Microsoft.FSharp.Primitives.Basics.List.filter f x

[<CompiledName("GroupBy")>]
let groupBy keyf (list: 'T list) =
let dict = new Dictionary<Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>,ResizeArray<'T>>(Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer)
let inline groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (list: 'T list) =
let dict = Dictionary<_,ResizeArray<_>> comparer

// Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying
// for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the
// Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. Having it larger means
// that we won't be having as many re-allocations. The ResizeArray is destroyed at the end anyway.
let initialBucketSize = 4

// Build the groupings
let rec loop list =
match list with
| v :: t ->
let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (keyf v)
let ok,prev = dict.TryGetValue(key)
if ok then
prev.Add(v)
let key = keyf v
let mutable prev = Unchecked.defaultof<_>
if dict.TryGetValue(key, &prev) then
prev.Add v
else
let prev = new ResizeArray<'T>(1)
let prev = ResizeArray initialBucketSize
dict.[key] <- prev
prev.Add(v)
prev.Add v
loop t
| _ -> ()
loop list

// Return the list-of-lists.
dict
|> Seq.map (fun group -> (group.Key.Value, Seq.toList group.Value))
|> Seq.map (fun group -> (getKey group.Key, Seq.toList group.Value))
|> Seq.toList

// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
let groupByValueType (keyf:'T->'Key) (list:'T list) = groupByImpl HashIdentity.Structural<'Key> keyf id list

// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
let groupByRefType (keyf:'T->'Key) (list:'T list) = groupByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (keyf t)) (fun sb -> sb.Value) list

[<CompiledName("GroupBy")>]
let groupBy (keyf:'T->'Key) (list:'T list) =
#if FX_ATLEAST_40
if typeof<'Key>.IsValueType
then groupByValueType keyf list
else groupByRefType keyf list
#else
groupByRefType keyf list
#endif

[<CompiledName("Partition")>]
let partition p x = Microsoft.FSharp.Primitives.Basics.List.partition p x

Expand Down
Loading