Skip to content

use explicit simd for iszero check on partials #559

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/ForwardDiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ if VERSION >= v"1.6"
end
using Random
using LinearAlgebra
import SIMD: Vec

import Printf
import NaNMath
Expand Down
23 changes: 15 additions & 8 deletions src/partials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ end
@inline _mul_partials(a::Partials{0,A}, b::Partials{N,B}, afactor, bfactor) where {N,A,B} = bfactor * b
@inline _mul_partials(a::Partials{N,A}, b::Partials{0,B}, afactor, bfactor) where {N,A,B} = afactor * a

const SIMDFloat = Union{Float64, Float32}
const SIMDInt = Union{
Int128, Int64, Int32, Int16, Int8,
UInt128, UInt64, UInt32, UInt16, UInt8,
}
const SIMDType = Union{SIMDFloat, SIMDInt}

##################################
# Generated Functions on NTuples #
##################################
Expand All @@ -164,6 +171,7 @@ end
@inline rand_tuple(::AbstractRNG, ::Type{Tuple{}}) = tuple()
@inline rand_tuple(::Type{Tuple{}}) = tuple()

iszero_tuple(tup::NTuple{N,V}) where {N, V<:SIMDType} = sum(Vec(tup) != zero(V)) == 0
@generated function iszero_tuple(tup::NTuple{N,V}) where {N,V}
ex = Expr(:&&, [:(z == tup[$i]) for i=1:N]...)
return quote
Expand Down Expand Up @@ -205,15 +213,14 @@ const SIMDInt = Union{
}
const SIMDType = Union{SIMDFloat, SIMDInt}
const NT{N,T} = NTuple{N,T}
using SIMD

# SIMD implementation
add_tuples(a::NT{N,T}, b::NT{N,T}) where {N, T<:SIMDType} = Tuple(Vec(a) + Vec(b))
sub_tuples(a::NT{N,T}, b::NT{N,T}) where {N, T<:SIMDType} = Tuple(Vec(a) - Vec(b))
scale_tuple(tup::NT{N,T}, x::T) where {N, T<:SIMDType} = Tuple(Vec(tup) * x)
div_tuple_by_scalar(tup::NT{N,T}, x::T) where {N, T<:SIMDFloat} = Tuple(Vec(tup) / x)
minus_tuple(tup::NT{N,T}) where {N, T<:SIMDType} = Tuple(-Vec(tup))
mul_tuples(a::NT{N,T}, b::NT{N,T}, af::T, bf::T) where {N, T<:SIMDType} = Tuple(muladd(Vec{N,T}(af), Vec(a), Vec{N,T}(bf) * Vec(b)))
@inline add_tuples(a::NT{N,T}, b::NT{N,T}) where {N, T<:SIMDType} = Tuple(Vec(a) + Vec(b))
@inline sub_tuples(a::NT{N,T}, b::NT{N,T}) where {N, T<:SIMDType} = Tuple(Vec(a) - Vec(b))
@inline scale_tuple(tup::NT{N,T}, x::T) where {N, T<:SIMDType} = Tuple(Vec(tup) * x)
@inline div_tuple_by_scalar(tup::NT{N,T}, x::T) where {N, T<:SIMDFloat} = Tuple(Vec(tup) / x)
@inline minus_tuple(tup::NT{N,T}) where {N, T<:SIMDType} = Tuple(-Vec(tup))
@inline mul_tuples(a::NT{N,T}, b::NT{N,T}, af::T, bf::T) where {N, T<:SIMDType} = Tuple(muladd(af, Vec(a), bf * Vec(b)))


# Fallback implementations
Expand All @@ -222,7 +229,7 @@ mul_tuples(a::NT{N,T}, b::NT{N,T}, af::T, bf::T) where {N, T<:SIMDType} = Tuple
@generated scale_tuple(tup::NT{N}, x) where N = tupexpr(i -> :(tup[$i] * x), N)
@generated div_tuple_by_scalar(tup::NT{N}, x) where N = tupexpr(i -> :(tup[$i] / x), N)
@generated minus_tuple(tup::NT{N}) where N = tupexpr(i -> :(-tup[$i]), N)
@generated mul_tuples(a::NT{N}, b::NT{N}, af, bf) where N = tupexpr(i -> :((af * a[$i]) + (bf * b[$i])), N)
@generated mul_tuples(a::NT{N}, b::NT{N}, af, bf) where N = tupexpr(i -> :(muladd(af, a[$i], bf * b[$i])), N)

###################
# Pretty Printing #
Expand Down