From 61193e137d3f13ac4c73fadd8efd540114299712 Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Sun, 15 Dec 2024 21:55:57 +0100 Subject: [PATCH 1/9] Add gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 9b73c974..f70d3dc4 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ Manifest.toml # MacOS generated files *.DS_Store + +/.vscode/ From 285b64c2df84e316e2e71a462fb30049b9c3a1b6 Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Mon, 16 Dec 2024 01:25:14 +0100 Subject: [PATCH 2/9] Introduce Abstract types for sparse arrays --- Project.toml | 2 + lib/GPUArraysCore/Manifest.toml | 14 ----- lib/GPUArraysCore/Project.toml | 2 + lib/GPUArraysCore/src/GPUArraysCore.jl | 32 ++++++++++- lib/JLArrays/Project.toml | 2 + lib/JLArrays/src/JLArrays.jl | 3 + lib/JLArrays/src/sparse.jl | 33 +++++++++++ src/GPUArrays.jl | 6 +- src/host/sparse.jl | 79 ++++++++++++++++++++++++++ 9 files changed, 156 insertions(+), 17 deletions(-) delete mode 100644 lib/GPUArraysCore/Manifest.toml create mode 100644 lib/JLArrays/src/sparse.jl create mode 100644 src/host/sparse.jl diff --git a/Project.toml b/Project.toml index 551c9edc..33213c5a 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] @@ -24,5 +25,6 @@ Printf = "1" Random = "1" Reexport = "1" Serialization = "1" +SparseArrays = "1" Statistics = "1" julia = "1.10" diff --git a/lib/GPUArraysCore/Manifest.toml b/lib/GPUArraysCore/Manifest.toml deleted file mode 100644 index ca31d72b..00000000 --- a/lib/GPUArraysCore/Manifest.toml +++ /dev/null @@ -1,14 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -[[Adapt]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "af92965fb30777147966f58acb05da51c5616b5f" -uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "3.3.3" - -[[Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[LinearAlgebra]] -deps = ["Libdl"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/lib/GPUArraysCore/Project.toml b/lib/GPUArraysCore/Project.toml index 00ea6303..b47757ae 100644 --- a/lib/GPUArraysCore/Project.toml +++ b/lib/GPUArraysCore/Project.toml @@ -5,7 +5,9 @@ version = "0.2.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] Adapt = "4.0" +SparseArrays = "1" julia = "1.6" diff --git a/lib/GPUArraysCore/src/GPUArraysCore.jl b/lib/GPUArraysCore/src/GPUArraysCore.jl index bcf24e60..1df83012 100644 --- a/lib/GPUArraysCore/src/GPUArraysCore.jl +++ b/lib/GPUArraysCore/src/GPUArraysCore.jl @@ -1,7 +1,7 @@ module GPUArraysCore using Adapt - +using SparseArrays ## essential types @@ -9,6 +9,11 @@ export AbstractGPUArray, AbstractGPUVector, AbstractGPUMatrix, AbstractGPUVecOrM WrappedGPUArray, AnyGPUArray, AbstractGPUArrayStyle, AnyGPUArray, AnyGPUVector, AnyGPUMatrix +export AbstractGPUSparseArray, AbstractGPUSparseMatrix, AbstractGPUSparseVector, AbstractGPUSparseVecOrMat, + WrappedGPUSparseArray, AnyGPUSparseArray, AnyGPUSparseVector, AnyGPUSparseMatrix, + AbstractGPUSparseMatrixCSC, AbstractGPUSparseMatrixCSR, AbstractGPUSparseMatrixCOO, + WrappedGPUSparseMatrixCSC, AnyGPUSparseMatrixCSC, WrappedGPUSparseMatrixCSR, AnyGPUSparseMatrixCSR, WrappedGPUSparseMatrixCOO, AnyGPUSparseMatrixCOO + """ AbstractGPUArray{T, N} <: DenseArray{T, N} @@ -28,6 +33,31 @@ const AnyGPUArray{T,N} = Union{AbstractGPUArray{T,N}, WrappedGPUArray{T,N}} const AnyGPUVector{T} = AnyGPUArray{T, 1} const AnyGPUMatrix{T} = AnyGPUArray{T, 2} +## sparse arrays + +abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end + +const AbstractGPUSparseMatrix{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 2} +const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1} +const AbstractGPUSparseVecOrMat{Tv, Ti} = Union{AbstractGPUSparseVector{Tv, Ti}, AbstractGPUSparseMatrix{Tv, Ti}} + +const WrappedGPUSparseArray{Tv, Ti, N} = WrappedArray{Tv,N,AbstractGPUSparseArray,AbstractGPUSparseArray{Tv, Ti, N}} +const AnyGPUSparseArray{Tv, Ti, N} = Union{AbstractGPUSparseArray{Tv, Ti, N}, WrappedGPUSparseArray{Tv, Ti, N}} +const AnyGPUSparseVector{Tv, Ti} = AnyGPUSparseArray{Tv, Ti, 1} +const AnyGPUSparseMatrix{Tv, Ti} = AnyGPUSparseArray{Tv, Ti, 2} + +abstract type AbstractGPUSparseMatrixCSC{Tv,Ti<:Integer} <: AbstractGPUSparseMatrix{Tv,Ti} end +abstract type AbstractGPUSparseMatrixCSR{Tv,Ti<:Integer} <: AbstractGPUSparseMatrix{Tv,Ti} end +abstract type AbstractGPUSparseMatrixCOO{Tv,Ti<:Integer} <: AbstractGPUSparseMatrix{Tv,Ti} end + +const WrappedGPUSparseMatrixCSC{Tv,Ti} = WrappedArray{Tv,AbstractGPUSparseMatrixCSC,AbstractGPUSparseMatrixCSC{Tv,Ti}} +const AnyGPUSparseMatrixCSC{Tv,Ti} = Union{AbstractGPUSparseMatrixCSC{Tv,Ti}, WrappedGPUSparseMatrixCSC{Tv,Ti}} + +const WrappedGPUSparseMatrixCSR{Tv,Ti} = WrappedArray{Tv,AbstractGPUSparseMatrixCSR,AbstractGPUSparseMatrixCSR{Tv,Ti}} +const AnyGPUSparseMatrixCSR{Tv,Ti} = Union{AbstractGPUSparseMatrixCSR{Tv,Ti}, WrappedGPUSparseMatrixCSR{Tv,Ti}} + +const WrappedGPUSparseMatrixCOO{Tv,Ti} = WrappedArray{Tv,AbstractGPUSparseMatrixCOO,AbstractGPUSparseMatrixCOO{Tv,Ti}} +const AnyGPUSparseMatrixCOO{Tv,Ti} = Union{AbstractGPUSparseMatrixCOO{Tv,Ti}, WrappedGPUSparseMatrixCOO{Tv,Ti}} ## broadcasting diff --git a/lib/JLArrays/Project.toml b/lib/JLArrays/Project.toml index 55d9eb90..0c78e0af 100644 --- a/lib/JLArrays/Project.toml +++ b/lib/JLArrays/Project.toml @@ -8,10 +8,12 @@ Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] Adapt = "2.0, 3.0, 4.0" GPUArrays = "11.1" KernelAbstractions = "0.9" Random = "1" +SparseArrays = "1" julia = "1.8" diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index dbec3d25..a8666d2e 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -11,6 +11,7 @@ export JLArray, JLVector, JLMatrix, jl, JLBackend using GPUArrays using Adapt +using SparseArrays import KernelAbstractions import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config @@ -387,4 +388,6 @@ Adapt.adapt_storage(::JLBackend, a::Array) = Adapt.adapt(JLArrays.JLArray, a) Adapt.adapt_storage(::JLBackend, a::JLArrays.JLArray) = a Adapt.adapt_storage(::KernelAbstractions.CPU, a::JLArrays.JLArray) = convert(Array, a) +include("sparse.jl") + end diff --git a/lib/JLArrays/src/sparse.jl b/lib/JLArrays/src/sparse.jl new file mode 100644 index 00000000..b49bae53 --- /dev/null +++ b/lib/JLArrays/src/sparse.jl @@ -0,0 +1,33 @@ +export JLSparseMatrixCSC + +struct JLSparseMatrixCSC{Tv,Ti<:Integer} <: AbstractGPUSparseMatrixCSC{Tv,Ti} + m::Int # Number of rows + n::Int # Number of columns + colptr::JLVector{Ti} # Column i is in colptr[i]:(colptr[i+1]-1) + rowval::JLVector{Ti} # Row indices of stored values + nzval::JLVector{Tv} # Stored values, typically nonzeros + + function JLSparseMatrixCSC{Tv,Ti}(m::Integer, n::Integer, colptr::JLVector{Ti}, + rowval::JLVector{Ti}, nzval::JLVector{Tv}) where {Tv,Ti<:Integer} + SparseArrays.sparse_check_Ti(m, n, Ti) + GPUArrays._goodbuffers_csc(m, n, colptr, rowval, nzval) || + throw(ArgumentError("Invalid buffers for JLSparseMatrixCSC construction n=$n, colptr=$(summary(colptr)), rowval=$(summary(rowval)), nzval=$(summary(nzval))")) + new(Int(m), Int(n), colptr, rowval, nzval) + end +end +function JLSparseMatrixCSC(m::Integer, n::Integer, colptr::JLVector, rowval::JLVector, nzval::JLVector) + Tv = eltype(nzval) + Ti = promote_type(eltype(colptr), eltype(rowval)) + SparseArrays.sparse_check_Ti(m, n, Ti) + # SparseArrays.sparse_check(n, colptr, rowval, nzval) # TODO: this uses scalar indexing + # silently shorten rowval and nzval to usable index positions. + maxlen = abs(widemul(m, n)) + isbitstype(Ti) && (maxlen = min(maxlen, typemax(Ti) - 1)) + length(rowval) > maxlen && resize!(rowval, maxlen) + length(nzval) > maxlen && resize!(nzval, maxlen) + JLSparseMatrixCSC{Tv,Ti}(m, n, colptr, rowval, nzval) +end + +JLSparseMatrixCSC(A::SparseMatrixCSC) = JLSparseMatrixCSC(A.m, A.n, JLVector(A.colptr), JLVector(A.rowval), JLVector(A.nzval)) + +Base.copy(A::JLSparseMatrixCSC) = JLSparseMatrixCSC(A.m, A.n, copy(A.colptr), copy(A.rowval), copy(A.nzval)) diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index 418b87b5..d62d9bd6 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -4,6 +4,9 @@ using KernelAbstractions using Serialization using Random using LinearAlgebra +using SparseArrays +using SparseArrays: getcolptr, getrowval, getnzval + using Printf using LinearAlgebra.BLAS @@ -15,8 +18,6 @@ using LLVM.Interop using Reexport @reexport using GPUArraysCore -using KernelAbstractions - # device functionality include("device/abstractarray.jl") @@ -33,6 +34,7 @@ include("host/math.jl") include("host/random.jl") include("host/quirks.jl") include("host/uniformscaling.jl") +include("host/sparse.jl") include("host/statistics.jl") diff --git a/src/host/sparse.jl b/src/host/sparse.jl new file mode 100644 index 00000000..42a938c0 --- /dev/null +++ b/src/host/sparse.jl @@ -0,0 +1,79 @@ +## General Sparse Matrix + +Base.size(A::AbstractGPUSparseMatrix) = (A.m, A.n) + +SparseArrays.nonzeros(A::AbstractGPUSparseMatrix) = A.nzval +SparseArrays.getnzval(A::AbstractGPUSparseMatrix) = nonzeros(A) +SparseArrays.nnz(A::AbstractGPUSparseMatrix) = length(nzval(A)) + +function LinearAlgebra.rmul!(A::AbstractGPUSparseMatrix, x::Number) + rmul!(SparseArrays.getnzval(A), x) + return A +end + +function LinearAlgebra.lmul!(x::Number, A::AbstractGPUSparseMatrix) + lmul!(x, SparseArrays.getnzval(A)) + return A +end + +## CSC Matrix + +SparseArrays.getcolptr(A::AbstractGPUSparseMatrixCSC) = A.colptr +SparseArrays.rowvals(A::AbstractGPUSparseMatrixCSC) = A.rowval +SparseArrays.getrowval(A::AbstractGPUSparseMatrixCSC) = rowvals(A) +# SparseArrays.nzrange(A::AbstractGPUSparseMatrixCSC, col::Integer) = getcolptr(A)[col]:(getcolptr(A)[col+1]-1) # TODO: this uses scalar indexing + +function _goodbuffers_csc(m, n, colptr, rowval, nzval) + return (length(colptr) == n + 1 && length(rowval) == length(nzval)) + # TODO: also add the condition that colptr[end] - 1 == length(nzval) (allowscalar?) +end + +@inline function LinearAlgebra.mul!(C::AbstractGPUVector, A::AnyGPUSparseMatrixCSC, B::AbstractGPUVector, α::Number, β::Number) + return LinearAlgebra.generic_matvecmul!(C, LinearAlgebra.wrapper_char(A), LinearAlgebra._unwrap(A), B, LinearAlgebra.MulAddMul(α, β)) +end + +@inline function LinearAlgebra.generic_matvecmul!(C::AbstractGPUVector, tA, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVector, _add::LinearAlgebra.MulAddMul) + return SparseArrays.spdensemul!(C, tA, 'N', A, B, _add) +end + +Base.@constprop :aggressive function SparseArrays.spdensemul!(C::AbstractGPUVecOrMat, tA, tB, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVecOrMat, _add::LinearAlgebra.MulAddMul) + if tA == 'N' + return _spmatmul!(C, A, wrap(B, tB), _add.alpha, _add.beta) + else + throw(ArgumentError("tA different from 'N' not yet supported")) + end +end + +function _spmatmul!(C::AbstractGPUVecOrMat, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVecOrMat, α::Number, β::Number) + size(A, 2) == size(B, 1) || + throw(DimensionMismatch("second dimension of A, $(size(A,2)), does not match the first dimension of B, $(size(B,1))")) + size(A, 1) == size(C, 1) || + throw(DimensionMismatch("first dimension of A, $(size(A,1)), does not match the first dimension of C, $(size(C,1))")) + size(B, 2) == size(C, 2) || + throw(DimensionMismatch("second dimension of B, $(size(B,2)), does not match the second dimension of C, $(size(C,2))")) + + A_colptr = getcolptr(A) + A_rowval = rowvals(A) + A_nzval = getnzval(A) + β != one(β) && LinearAlgebra._rmul_or_fill!(C, β) + + @kernel function kernel_spmatmul!(C, @Const(A_colptr), @Const(A_rowval), @Const(A_nzval), @Const(B)) + k, col = @index(Global, NTuple) + + @inbounds axj = B[col, k] * α + @inbounds for j in A_colptr[col]:(A_colptr[col+1]-1) # nzrange(A, col) + KernelAbstractions.@atomic C[A_rowval[j], k] += A_nzval[j] * axj + end + end + + backend_C = KernelAbstractions.get_backend(C) + backend_A = KernelAbstractions.get_backend(A_nzval) + backend_B = KernelAbstractions.get_backend(B) + + backend_A == backend_B == backend_C || throw(ArgumentError("All arrays must be on the same backend")) + + kernel! = kernel_spmatmul!(backend_A) + kernel!(C, A_colptr, A_rowval, A_nzval, B; ndrange=(size(C, 2), size(A, 2))) + + return C +end From 8520f7f1e4485a4d6e2e97411759e79e48414406 Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Mon, 16 Dec 2024 09:44:51 +0100 Subject: [PATCH 3/9] Remove Wrapped Arrays --- lib/GPUArraysCore/src/GPUArraysCore.jl | 56 +++++++++----------------- 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/lib/GPUArraysCore/src/GPUArraysCore.jl b/lib/GPUArraysCore/src/GPUArraysCore.jl index 1df83012..a6237bbc 100644 --- a/lib/GPUArraysCore/src/GPUArraysCore.jl +++ b/lib/GPUArraysCore/src/GPUArraysCore.jl @@ -6,13 +6,11 @@ using SparseArrays ## essential types export AbstractGPUArray, AbstractGPUVector, AbstractGPUMatrix, AbstractGPUVecOrMat, - WrappedGPUArray, AnyGPUArray, AbstractGPUArrayStyle, - AnyGPUArray, AnyGPUVector, AnyGPUMatrix + WrappedGPUArray, AnyGPUArray, AbstractGPUArrayStyle, + AnyGPUArray, AnyGPUVector, AnyGPUMatrix export AbstractGPUSparseArray, AbstractGPUSparseMatrix, AbstractGPUSparseVector, AbstractGPUSparseVecOrMat, - WrappedGPUSparseArray, AnyGPUSparseArray, AnyGPUSparseVector, AnyGPUSparseMatrix, - AbstractGPUSparseMatrixCSC, AbstractGPUSparseMatrixCSR, AbstractGPUSparseMatrixCOO, - WrappedGPUSparseMatrixCSC, AnyGPUSparseMatrixCSC, WrappedGPUSparseMatrixCSR, AnyGPUSparseMatrixCSR, WrappedGPUSparseMatrixCOO, AnyGPUSparseMatrixCOO + AbstractGPUSparseMatrixCSC, AbstractGPUSparseMatrixCSR, AbstractGPUSparseMatrixCOO """ AbstractGPUArray{T, N} <: DenseArray{T, N} @@ -21,44 +19,30 @@ Supertype for `N`-dimensional GPU arrays (or array-like types) with elements of Instances of this type are expected to live on the host, see [`AbstractDeviceArray`](@ref) for device-side objects. """ -abstract type AbstractGPUArray{T, N} <: DenseArray{T, N} end +abstract type AbstractGPUArray{T,N} <: DenseArray{T,N} end -const AbstractGPUVector{T} = AbstractGPUArray{T, 1} -const AbstractGPUMatrix{T} = AbstractGPUArray{T, 2} -const AbstractGPUVecOrMat{T} = Union{AbstractGPUArray{T, 1}, AbstractGPUArray{T, 2}} +const AbstractGPUVector{T} = AbstractGPUArray{T,1} +const AbstractGPUMatrix{T} = AbstractGPUArray{T,2} +const AbstractGPUVecOrMat{T} = Union{AbstractGPUArray{T,1},AbstractGPUArray{T,2}} # convenience aliases for working with wrapped arrays const WrappedGPUArray{T,N} = WrappedArray{T,N,AbstractGPUArray,AbstractGPUArray{T,N}} -const AnyGPUArray{T,N} = Union{AbstractGPUArray{T,N}, WrappedGPUArray{T,N}} -const AnyGPUVector{T} = AnyGPUArray{T, 1} -const AnyGPUMatrix{T} = AnyGPUArray{T, 2} +const AnyGPUArray{T,N} = Union{AbstractGPUArray{T,N},WrappedGPUArray{T,N}} +const AnyGPUVector{T} = AnyGPUArray{T,1} +const AnyGPUMatrix{T} = AnyGPUArray{T,2} ## sparse arrays -abstract type AbstractGPUSparseArray{Tv, Ti, N} <: AbstractSparseArray{Tv, Ti, N} end +abstract type AbstractGPUSparseArray{Tv,Ti,N} <: AbstractSparseArray{Tv,Ti,N} end -const AbstractGPUSparseMatrix{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 2} -const AbstractGPUSparseVector{Tv, Ti} = AbstractGPUSparseArray{Tv, Ti, 1} -const AbstractGPUSparseVecOrMat{Tv, Ti} = Union{AbstractGPUSparseVector{Tv, Ti}, AbstractGPUSparseMatrix{Tv, Ti}} - -const WrappedGPUSparseArray{Tv, Ti, N} = WrappedArray{Tv,N,AbstractGPUSparseArray,AbstractGPUSparseArray{Tv, Ti, N}} -const AnyGPUSparseArray{Tv, Ti, N} = Union{AbstractGPUSparseArray{Tv, Ti, N}, WrappedGPUSparseArray{Tv, Ti, N}} -const AnyGPUSparseVector{Tv, Ti} = AnyGPUSparseArray{Tv, Ti, 1} -const AnyGPUSparseMatrix{Tv, Ti} = AnyGPUSparseArray{Tv, Ti, 2} +const AbstractGPUSparseMatrix{Tv,Ti} = AbstractGPUSparseArray{Tv,Ti,2} +const AbstractGPUSparseVector{Tv,Ti} = AbstractGPUSparseArray{Tv,Ti,1} +const AbstractGPUSparseVecOrMat{Tv,Ti} = Union{AbstractGPUSparseVector{Tv,Ti},AbstractGPUSparseMatrix{Tv,Ti}} abstract type AbstractGPUSparseMatrixCSC{Tv,Ti<:Integer} <: AbstractGPUSparseMatrix{Tv,Ti} end abstract type AbstractGPUSparseMatrixCSR{Tv,Ti<:Integer} <: AbstractGPUSparseMatrix{Tv,Ti} end abstract type AbstractGPUSparseMatrixCOO{Tv,Ti<:Integer} <: AbstractGPUSparseMatrix{Tv,Ti} end -const WrappedGPUSparseMatrixCSC{Tv,Ti} = WrappedArray{Tv,AbstractGPUSparseMatrixCSC,AbstractGPUSparseMatrixCSC{Tv,Ti}} -const AnyGPUSparseMatrixCSC{Tv,Ti} = Union{AbstractGPUSparseMatrixCSC{Tv,Ti}, WrappedGPUSparseMatrixCSC{Tv,Ti}} - -const WrappedGPUSparseMatrixCSR{Tv,Ti} = WrappedArray{Tv,AbstractGPUSparseMatrixCSR,AbstractGPUSparseMatrixCSR{Tv,Ti}} -const AnyGPUSparseMatrixCSR{Tv,Ti} = Union{AbstractGPUSparseMatrixCSR{Tv,Ti}, WrappedGPUSparseMatrixCSR{Tv,Ti}} - -const WrappedGPUSparseMatrixCOO{Tv,Ti} = WrappedArray{Tv,AbstractGPUSparseMatrixCOO,AbstractGPUSparseMatrixCOO{Tv,Ti}} -const AnyGPUSparseMatrixCOO{Tv,Ti} = Union{AbstractGPUSparseMatrixCOO{Tv,Ti}, WrappedGPUSparseMatrixCOO{Tv,Ti}} - ## broadcasting """ @@ -187,9 +171,9 @@ end # this problem will be introduced in https://github.com/JuliaLang/julia/pull/39217 macro __tryfinally(ex, fin) Expr(:tryfinally, - :($(esc(ex))), - :($(esc(fin))) - ) + :($(esc(ex))), + :($(esc(fin))) + ) end """ @@ -212,7 +196,7 @@ end function allowscalar(allow::Bool=true) if allow @warn """It's not recommended to use allowscalar([true]) to allow scalar indexing. - Instead, use `allowscalar() do end` or `@allowscalar` to denote exactly which operations can use scalar operations.""" maxlog=1 + Instead, use `allowscalar() do end` or `@allowscalar` to denote exactly which operations can use scalar operations.""" maxlog = 1 end setting = allow ? ScalarAllowed : ScalarDisallowed task_local_storage(:ScalarIndexing, setting) @@ -234,8 +218,8 @@ macro allowscalar(ex) local tls_value = get(task_local_storage(), :ScalarIndexing, nothing) task_local_storage(:ScalarIndexing, ScalarAllowed) @__tryfinally($(esc(ex)), - isnothing(tls_value) ? delete!(task_local_storage(), :ScalarIndexing) - : task_local_storage(:ScalarIndexing, tls_value)) + isnothing(tls_value) ? delete!(task_local_storage(), :ScalarIndexing) + : task_local_storage(:ScalarIndexing, tls_value)) end end From 4d59b39590527cc740ca14643a8e1bdaeba10e2c Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Mon, 16 Dec 2024 14:59:49 +0100 Subject: [PATCH 4/9] Introduce sparse vector type --- lib/JLArrays/src/sparse.jl | 28 +++++++++++++++++++++++++++- src/GPUArrays.jl | 2 +- src/host/sparse.jl | 31 ++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/lib/JLArrays/src/sparse.jl b/lib/JLArrays/src/sparse.jl index b49bae53..578d902b 100644 --- a/lib/JLArrays/src/sparse.jl +++ b/lib/JLArrays/src/sparse.jl @@ -1,4 +1,29 @@ -export JLSparseMatrixCSC +export JLSparseVector, JLSparseMatrixCSC + +## Sparse Vector + +struct JLSparseVector{Tv,Ti<:Integer} <: AbstractGPUSparseVector{Tv,Ti} + n::Ti # Length of the sparse vector + nzind::JLVector{Ti} # Indices of stored values + nzval::JLVector{Tv} # Stored values, typically nonzeros + + function JLSparseVector{Tv,Ti}(n::Integer, nzind::JLVector{Ti}, nzval::JLVector{Tv}) where {Tv,Ti<:Integer} + n >= 0 || throw(ArgumentError("The number of elements must be non-negative.")) + length(nzind) == length(nzval) || + throw(ArgumentError("index and value vectors must be the same length")) + new(convert(Ti, n), nzind, nzval) + end +end + +JLSparseVector(n::Integer, nzind::JLVector{Ti}, nzval::JLVector{Tv}) where {Tv,Ti} = + JLSparseVector{Tv,Ti}(n, nzind, nzval) + +JLSparseVector(V::SparseVector) = JLSparseVector(V.n, JLVector(V.nzind), JLVector(V.nzval)) +SparseVector(V::JLSparseVector) = SparseVector(V.n, Vector(V.nzind), Vector(V.nzval)) + +Base.copy(V::JLSparseVector) = JLSparseVector(V.n, copy(V.nzind), copy(V.nzval)) + +## SparseMatrixCSC struct JLSparseMatrixCSC{Tv,Ti<:Integer} <: AbstractGPUSparseMatrixCSC{Tv,Ti} m::Int # Number of rows @@ -29,5 +54,6 @@ function JLSparseMatrixCSC(m::Integer, n::Integer, colptr::JLVector, rowval::JLV end JLSparseMatrixCSC(A::SparseMatrixCSC) = JLSparseMatrixCSC(A.m, A.n, JLVector(A.colptr), JLVector(A.rowval), JLVector(A.nzval)) +SparseMatrixCSC(A::JLSparseMatrixCSC) = SparseMatrixCSC(A.m, A.n, Vector(A.colptr), Vector(A.rowval), Vector(A.nzval)) Base.copy(A::JLSparseMatrixCSC) = JLSparseMatrixCSC(A.m, A.n, copy(A.colptr), copy(A.rowval), copy(A.nzval)) diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index d62d9bd6..a47cd94d 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -5,7 +5,7 @@ using Serialization using Random using LinearAlgebra using SparseArrays -using SparseArrays: getcolptr, getrowval, getnzval +using SparseArrays: getcolptr, getrowval, getnzval, nonzeroinds using Printf diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 42a938c0..df558156 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -1,3 +1,32 @@ +## Sparse Vector + +Base.length(V::AbstractGPUSparseVector) = V.n +Base.size(V::AbstractGPUSparseVector) = (V.n,) + +SparseArrays.nonzeros(V::AbstractGPUSparseVector) = V.nzval +SparseArrays.getnzval(V::AbstractGPUSparseVector) = nonzeros(V) +SparseArrays.nnz(V::AbstractGPUSparseVector) = length(nzval(V)) +SparseArrays.nonzeroinds(V::AbstractGPUSparseVector) = V.nzind + +function Base.sizehint!(V::AbstractGPUSparseVector, newlen::Integer) + sizehint!(nonzeroinds(V), newlen) + sizehint!(nonzeros(V), newlen) + return V +end + +function LinearAlgebra.dot(x::AbstractGPUSparseVector, y::AbstractGPUVector) + n = length(y) + length(x) == n || throw(DimensionMismatch( + "Vector x has a length $(length(x)) but y has a length $n")) + nzind = nonzeroinds(x) + nzval = nonzeros(x) + y_view = y[nzind] # TODO: by using the view it throws scalar indexing + return dot(nzval, y_view) +end +LinearAlgebra.dot(x::AbstractGPUVector{T}, y::AbstractGPUSparseVector{T}) where T<:Real = dot(y, x) +LinearAlgebra.dot(x::AbstractGPUVector{T}, y::AbstractGPUSparseVector{T}) where T<:Complex = conj(dot(y, x)) + + ## General Sparse Matrix Base.size(A::AbstractGPUSparseMatrix) = (A.m, A.n) @@ -28,7 +57,7 @@ function _goodbuffers_csc(m, n, colptr, rowval, nzval) # TODO: also add the condition that colptr[end] - 1 == length(nzval) (allowscalar?) end -@inline function LinearAlgebra.mul!(C::AbstractGPUVector, A::AnyGPUSparseMatrixCSC, B::AbstractGPUVector, α::Number, β::Number) +@inline function LinearAlgebra.mul!(C::AbstractGPUVector, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVector, α::Number, β::Number) return LinearAlgebra.generic_matvecmul!(C, LinearAlgebra.wrapper_char(A), LinearAlgebra._unwrap(A), B, LinearAlgebra.MulAddMul(α, β)) end From b204d840572618d37fadcdace1e589972b8756e3 Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Mon, 16 Dec 2024 19:04:14 +0100 Subject: [PATCH 5/9] Change the definition of the methods --- lib/JLArrays/src/sparse.jl | 13 +++++++++++++ src/host/sparse.jl | 27 ++++++++------------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/lib/JLArrays/src/sparse.jl b/lib/JLArrays/src/sparse.jl index 578d902b..b273f51b 100644 --- a/lib/JLArrays/src/sparse.jl +++ b/lib/JLArrays/src/sparse.jl @@ -23,6 +23,12 @@ SparseVector(V::JLSparseVector) = SparseVector(V.n, Vector(V.nzind), Vector(V.nz Base.copy(V::JLSparseVector) = JLSparseVector(V.n, copy(V.nzind), copy(V.nzval)) +Base.length(V::JLSparseVector) = V.n +Base.size(V::JLSparseVector) = (V.n,) + +SparseArrays.nonzeros(V::JLSparseVector) = V.nzval +SparseArrays.nonzeroinds(V::JLSparseVector) = V.nzind + ## SparseMatrixCSC struct JLSparseMatrixCSC{Tv,Ti<:Integer} <: AbstractGPUSparseMatrixCSC{Tv,Ti} @@ -57,3 +63,10 @@ JLSparseMatrixCSC(A::SparseMatrixCSC) = JLSparseMatrixCSC(A.m, A.n, JLVector(A.c SparseMatrixCSC(A::JLSparseMatrixCSC) = SparseMatrixCSC(A.m, A.n, Vector(A.colptr), Vector(A.rowval), Vector(A.nzval)) Base.copy(A::JLSparseMatrixCSC) = JLSparseMatrixCSC(A.m, A.n, copy(A.colptr), copy(A.rowval), copy(A.nzval)) + +Base.size(A::JLSparseMatrixCSC) = (A.m, A.n) +Base.length(A::JLSparseMatrixCSC) = A.m * A.n + +SparseArrays.nonzeros(A::JLSparseMatrixCSC) = A.nzval +SparseArrays.getcolptr(A::JLSparseMatrixCSC) = A.colptr +SparseArrays.rowvals(A::JLSparseMatrixCSC) = A.rowval diff --git a/src/host/sparse.jl b/src/host/sparse.jl index df558156..d3769b21 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -1,12 +1,7 @@ ## Sparse Vector -Base.length(V::AbstractGPUSparseVector) = V.n -Base.size(V::AbstractGPUSparseVector) = (V.n,) - -SparseArrays.nonzeros(V::AbstractGPUSparseVector) = V.nzval SparseArrays.getnzval(V::AbstractGPUSparseVector) = nonzeros(V) SparseArrays.nnz(V::AbstractGPUSparseVector) = length(nzval(V)) -SparseArrays.nonzeroinds(V::AbstractGPUSparseVector) = V.nzind function Base.sizehint!(V::AbstractGPUSparseVector, newlen::Integer) sizehint!(nonzeroinds(V), newlen) @@ -29,26 +24,23 @@ LinearAlgebra.dot(x::AbstractGPUVector{T}, y::AbstractGPUSparseVector{T}) where ## General Sparse Matrix -Base.size(A::AbstractGPUSparseMatrix) = (A.m, A.n) +KernelAbstractions.get_backend(A::AbstractGPUSparseMatrix) = KernelAbstractions.get_backend(getnzval(A)) -SparseArrays.nonzeros(A::AbstractGPUSparseMatrix) = A.nzval SparseArrays.getnzval(A::AbstractGPUSparseMatrix) = nonzeros(A) -SparseArrays.nnz(A::AbstractGPUSparseMatrix) = length(nzval(A)) +SparseArrays.nnz(A::AbstractGPUSparseMatrix) = length(getnzval(A)) function LinearAlgebra.rmul!(A::AbstractGPUSparseMatrix, x::Number) - rmul!(SparseArrays.getnzval(A), x) + rmul!(getnzval(A), x) return A end function LinearAlgebra.lmul!(x::Number, A::AbstractGPUSparseMatrix) - lmul!(x, SparseArrays.getnzval(A)) + lmul!(x, getnzval(A)) return A end ## CSC Matrix -SparseArrays.getcolptr(A::AbstractGPUSparseMatrixCSC) = A.colptr -SparseArrays.rowvals(A::AbstractGPUSparseMatrixCSC) = A.rowval SparseArrays.getrowval(A::AbstractGPUSparseMatrixCSC) = rowvals(A) # SparseArrays.nzrange(A::AbstractGPUSparseMatrixCSC, col::Integer) = getcolptr(A)[col]:(getcolptr(A)[col+1]-1) # TODO: this uses scalar indexing @@ -81,22 +73,19 @@ function _spmatmul!(C::AbstractGPUVecOrMat, A::AbstractGPUSparseMatrixCSC, B::Ab size(B, 2) == size(C, 2) || throw(DimensionMismatch("second dimension of B, $(size(B,2)), does not match the second dimension of C, $(size(C,2))")) - A_colptr = getcolptr(A) - A_rowval = rowvals(A) - A_nzval = getnzval(A) β != one(β) && LinearAlgebra._rmul_or_fill!(C, β) - @kernel function kernel_spmatmul!(C, @Const(A_colptr), @Const(A_rowval), @Const(A_nzval), @Const(B)) + @kernel function kernel_spmatmul!(C, @Const(A), @Const(B)) k, col = @index(Global, NTuple) @inbounds axj = B[col, k] * α - @inbounds for j in A_colptr[col]:(A_colptr[col+1]-1) # nzrange(A, col) - KernelAbstractions.@atomic C[A_rowval[j], k] += A_nzval[j] * axj + @inbounds for j in getcolptr(A)[col]:(getcolptr(A)[col+1]-1) # nzrange(A, col) + KernelAbstractions.@atomic C[rowvals(A)[j], k] += getnzval(A)[j] * axj end end backend_C = KernelAbstractions.get_backend(C) - backend_A = KernelAbstractions.get_backend(A_nzval) + backend_A = KernelAbstractions.get_backend(A) backend_B = KernelAbstractions.get_backend(B) backend_A == backend_B == backend_C || throw(ArgumentError("All arrays must be on the same backend")) From 3b93cda6e39de91d81dd1e8dc616d70eac115d12 Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Tue, 17 Dec 2024 01:53:29 +0100 Subject: [PATCH 6/9] Add methods for transpose and adjoint --- lib/GPUArraysCore/Project.toml | 2 + lib/GPUArraysCore/src/GPUArraysCore.jl | 7 +- src/host/sparse.jl | 112 +++++++++++++++++-------- 3 files changed, 83 insertions(+), 38 deletions(-) diff --git a/lib/GPUArraysCore/Project.toml b/lib/GPUArraysCore/Project.toml index b47757ae..ba4b1ee4 100644 --- a/lib/GPUArraysCore/Project.toml +++ b/lib/GPUArraysCore/Project.toml @@ -5,9 +5,11 @@ version = "0.2.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] Adapt = "4.0" +LinearAlgebra = "1" SparseArrays = "1" julia = "1.6" diff --git a/lib/GPUArraysCore/src/GPUArraysCore.jl b/lib/GPUArraysCore/src/GPUArraysCore.jl index a6237bbc..409c8303 100644 --- a/lib/GPUArraysCore/src/GPUArraysCore.jl +++ b/lib/GPUArraysCore/src/GPUArraysCore.jl @@ -1,6 +1,7 @@ module GPUArraysCore using Adapt +using LinearAlgebra using SparseArrays ## essential types @@ -10,7 +11,7 @@ export AbstractGPUArray, AbstractGPUVector, AbstractGPUMatrix, AbstractGPUVecOrM AnyGPUArray, AnyGPUVector, AnyGPUMatrix export AbstractGPUSparseArray, AbstractGPUSparseMatrix, AbstractGPUSparseVector, AbstractGPUSparseVecOrMat, - AbstractGPUSparseMatrixCSC, AbstractGPUSparseMatrixCSR, AbstractGPUSparseMatrixCOO + AbstractGPUSparseMatrixCSC, AbstractGPUSparseMatrixCSR, AbstractGPUSparseMatrixCOO, AnyGPUSparseMatrixCSC, AnyGPUSparseMatrixCSR, AnyGPUSparseMatrixCOO """ AbstractGPUArray{T, N} <: DenseArray{T, N} @@ -43,6 +44,10 @@ abstract type AbstractGPUSparseMatrixCSC{Tv,Ti<:Integer} <: AbstractGPUSparseMat abstract type AbstractGPUSparseMatrixCSR{Tv,Ti<:Integer} <: AbstractGPUSparseMatrix{Tv,Ti} end abstract type AbstractGPUSparseMatrixCOO{Tv,Ti<:Integer} <: AbstractGPUSparseMatrix{Tv,Ti} end +const AnyGPUSparseMatrixCSC{Tv,Ti} = Union{AbstractGPUSparseMatrixCSC{Tv,Ti},Transpose{Tv,<:AbstractGPUSparseMatrixCSC{Tv,Ti}},Adjoint{Tv,<:AbstractGPUSparseMatrixCSC{Tv,Ti}}} +const AnyGPUSparseMatrixCSR{Tv,Ti} = Union{AbstractGPUSparseMatrixCSR{Tv,Ti},Transpose{Tv,<:AbstractGPUSparseMatrixCSR{Tv,Ti}},Adjoint{Tv,<:AbstractGPUSparseMatrixCSR{Tv,Ti}}} +const AnyGPUSparseMatrixCOO{Tv,Ti} = Union{AbstractGPUSparseMatrixCOO{Tv,Ti},Transpose{Tv,<:AbstractGPUSparseMatrixCOO{Tv,Ti}},Adjoint{Tv,<:AbstractGPUSparseMatrixCOO{Tv,Ti}}} + ## broadcasting """ diff --git a/src/host/sparse.jl b/src/host/sparse.jl index d3769b21..f7bdbcf3 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -1,3 +1,13 @@ +## Wrappers + +trans_adj_wrappers_dense_vecormat = ((T -> :(AbstractGPUVecOrMat{$T}), false, identity, identity), + (T -> :(Transpose{$T,<:AbstractGPUMatrix{$T}}), true, identity, A -> :(parent($A))), + (T -> :(Adjoint{$T,<:AbstractGPUMatrix{$T}}), true, x -> :(conj($x)), A -> :(parent($A)))) + +trans_adj_wrappers_csc = ((T -> :(AbstractGPUSparseMatrixCSC{$T}), false, identity, identity), + (T -> :(Transpose{$T,<:AbstractGPUSparseMatrixCSC{$T}}), true, identity, A -> :(parent($A))), + (T -> :(Adjoint{$T,<:AbstractGPUSparseMatrixCSC{$T}}), true, x -> :(conj($x)), A -> :(parent($A)))) + ## Sparse Vector SparseArrays.getnzval(V::AbstractGPUSparseVector) = nonzeros(V) @@ -18,8 +28,8 @@ function LinearAlgebra.dot(x::AbstractGPUSparseVector, y::AbstractGPUVector) y_view = y[nzind] # TODO: by using the view it throws scalar indexing return dot(nzval, y_view) end -LinearAlgebra.dot(x::AbstractGPUVector{T}, y::AbstractGPUSparseVector{T}) where T<:Real = dot(y, x) -LinearAlgebra.dot(x::AbstractGPUVector{T}, y::AbstractGPUSparseVector{T}) where T<:Complex = conj(dot(y, x)) +LinearAlgebra.dot(x::AbstractGPUVector{T}, y::AbstractGPUSparseVector{T}) where {T<:Real} = dot(y, x) +LinearAlgebra.dot(x::AbstractGPUVector{T}, y::AbstractGPUSparseVector{T}) where {T<:Complex} = conj(dot(y, x)) ## General Sparse Matrix @@ -49,49 +59,77 @@ function _goodbuffers_csc(m, n, colptr, rowval, nzval) # TODO: also add the condition that colptr[end] - 1 == length(nzval) (allowscalar?) end -@inline function LinearAlgebra.mul!(C::AbstractGPUVector, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVector, α::Number, β::Number) - return LinearAlgebra.generic_matvecmul!(C, LinearAlgebra.wrapper_char(A), LinearAlgebra._unwrap(A), B, LinearAlgebra.MulAddMul(α, β)) -end +# @inline function LinearAlgebra.mul!(C::AbstractGPUVector, A::AnyGPUSparseMatrixCSC, B::AbstractGPUVector, α::Number, β::Number) +# return LinearAlgebra.generic_matvecmul!(C, LinearAlgebra.wrapper_char(A), LinearAlgebra._unwrap(A), B, LinearAlgebra.MulAddMul(α, β)) +# end @inline function LinearAlgebra.generic_matvecmul!(C::AbstractGPUVector, tA, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVector, _add::LinearAlgebra.MulAddMul) - return SparseArrays.spdensemul!(C, tA, 'N', A, B, _add) + return _spmatmul!(C, wrap(A, tA), B, _add.alpha, _add.beta) end -Base.@constprop :aggressive function SparseArrays.spdensemul!(C::AbstractGPUVecOrMat, tA, tB, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVecOrMat, _add::LinearAlgebra.MulAddMul) - if tA == 'N' - return _spmatmul!(C, A, wrap(B, tB), _add.alpha, _add.beta) - else - throw(ArgumentError("tA different from 'N' not yet supported")) - end +@inline function LinearAlgebra.generic_matmatmul!(C::AbstractGPUMatrix, tA, tb, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUMatrix, _add::LinearAlgebra.MulAddMul) + return _spmatmul!(C, wrap(A, tA), wrap(B, tb), _add.alpha, _add.beta) end -function _spmatmul!(C::AbstractGPUVecOrMat, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVecOrMat, α::Number, β::Number) - size(A, 2) == size(B, 1) || - throw(DimensionMismatch("second dimension of A, $(size(A,2)), does not match the first dimension of B, $(size(B,1))")) - size(A, 1) == size(C, 1) || - throw(DimensionMismatch("first dimension of A, $(size(A,1)), does not match the first dimension of C, $(size(C,1))")) - size(B, 2) == size(C, 2) || - throw(DimensionMismatch("second dimension of B, $(size(B,2)), does not match the second dimension of C, $(size(C,2))")) - - β != one(β) && LinearAlgebra._rmul_or_fill!(C, β) - - @kernel function kernel_spmatmul!(C, @Const(A), @Const(B)) - k, col = @index(Global, NTuple) - - @inbounds axj = B[col, k] * α - @inbounds for j in getcolptr(A)[col]:(getcolptr(A)[col+1]-1) # nzrange(A, col) - KernelAbstractions.@atomic C[rowvals(A)[j], k] += getnzval(A)[j] * axj - end - end +for (wrapa, transa, opa, unwrapa) in trans_adj_wrappers_csc + for (wrapb, transb, opb, unwrapb) in trans_adj_wrappers_dense_vecormat + TypeA = wrapa(:(T1)) + TypeB = wrapb(:(T2)) + TypeC = :(AbstractGPUVecOrMat{T3}) + + kernel_spmatmul! = transa ? :kernel_spmatmul_T! : :kernel_spmatmul_N! + + indB = transb ? (i, j) -> :(($j, $i)) : (i, j) -> :(($i, $j)) # transpose indices + + @eval function _spmatmul!(C::$TypeC, A::$TypeA, B::$TypeB, α::Number, β::Number) where {T1,T2,T3} + size(A, 2) == size(B, 1) || + throw(DimensionMismatch("second dimension of A, $(size(A,2)), does not match the first dimension of B, $(size(B,1))")) + size(A, 1) == size(C, 1) || + throw(DimensionMismatch("first dimension of A, $(size(A,1)), does not match the first dimension of C, $(size(C,1))")) + size(B, 2) == size(C, 2) || + throw(DimensionMismatch("second dimension of B, $(size(B,2)), does not match the second dimension of C, $(size(C,2))")) - backend_C = KernelAbstractions.get_backend(C) - backend_A = KernelAbstractions.get_backend(A) - backend_B = KernelAbstractions.get_backend(B) + _A = $(unwrapa(:A)) + _B = $(unwrapb(:B)) - backend_A == backend_B == backend_C || throw(ArgumentError("All arrays must be on the same backend")) + backend_C = KernelAbstractions.get_backend(C) + backend_A = KernelAbstractions.get_backend(_A) + backend_B = KernelAbstractions.get_backend(_B) - kernel! = kernel_spmatmul!(backend_A) - kernel!(C, A_colptr, A_rowval, A_nzval, B; ndrange=(size(C, 2), size(A, 2))) + backend_A == backend_B == backend_C || throw(ArgumentError("All arrays must be on the same backend")) - return C + @kernel function kernel_spmatmul_N!(C, @Const(A_colptr), @Const(A_rowval), @Const(A_nzval), @Const(B)) + k, col = @index(Global, NTuple) + + Bi, Bj = $(indB(:col, :k)) + + @inbounds axj = $(opb(:(B[Bi, Bj]))) * α + @inbounds for j in A_colptr[col]:(A_colptr[col+1]-1) # nzrange(A, col) + KernelAbstractions.@atomic C[A_rowval[j], k] += $(opa(:(A_nzval[j]))) * axj + end + end + + @kernel function kernel_spmatmul_T!(C, @Const(A_colptr), @Const(A_rowval), @Const(A_nzval), @Const(B)) + k, col = @index(Global, NTuple) + + tmp = zero(eltype(C)) + @inbounds for j in A_colptr[col]:(A_colptr[col+1]-1) # nzrange(A, col) + Bi, Bj = $(indB(:(A_rowval[j]), :k)) + tmp += $(opa(:(A_nzval[j]))) * $(opb(:(B[Bi, Bj]))) + end + @inbounds C[col, k] += tmp * α + end + + β != one(β) && LinearAlgebra._rmul_or_fill!(C, β) + + A_colptr = getcolptr(_A) + A_rowval = getrowval(_A) + A_nzval = getnzval(_A) + + kernel! = $kernel_spmatmul!(backend_A) + kernel!(C, A_colptr, A_rowval, A_nzval, _B; ndrange=(size(C, 2), size(_A, 2))) + + return C + end + end end From ebca99d56270b807366cceac774e2afba993c9f1 Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Tue, 17 Dec 2024 18:55:11 +0100 Subject: [PATCH 7/9] Add Device type for CSC matrix --- lib/JLArrays/src/JLArrays.jl | 1 + lib/JLArrays/src/sparse.jl | 23 +++++++++++++++++++++++ src/host/sparse.jl | 20 ++++++++------------ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index a8666d2e..94418265 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -12,6 +12,7 @@ using GPUArrays using Adapt using SparseArrays +using SparseArrays: getcolptr, getrowval, getnzval, nonzeroinds import KernelAbstractions import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config diff --git a/lib/JLArrays/src/sparse.jl b/lib/JLArrays/src/sparse.jl index b273f51b..bf1f0bc8 100644 --- a/lib/JLArrays/src/sparse.jl +++ b/lib/JLArrays/src/sparse.jl @@ -70,3 +70,26 @@ Base.length(A::JLSparseMatrixCSC) = A.m * A.n SparseArrays.nonzeros(A::JLSparseMatrixCSC) = A.nzval SparseArrays.getcolptr(A::JLSparseMatrixCSC) = A.colptr SparseArrays.rowvals(A::JLSparseMatrixCSC) = A.rowval + +## Device + +function Adapt.adapt_structure(to, A::JLSparseMatrixCSC) + m = A.m + n = A.n + colptr = Adapt.adapt(to, getcolptr(A)) + rowval = Adapt.adapt(to, rowvals(A)) + nzval = Adapt.adapt(to, nonzeros(A)) + return JLSparseDeviceMatrixCSC(m, n, colptr, rowval, nzval) +end + +struct JLSparseDeviceMatrixCSC{Tv,Ti} <: AbstractGPUSparseMatrixCSC{Tv,Ti} + m::Int + n::Int + colptr::JLDeviceArray{Ti,1} + rowval::JLDeviceArray{Ti,1} + nzval::JLDeviceArray{Tv,1} +end + +SparseArrays.nonzeros(A::JLSparseDeviceMatrixCSC) = A.nzval +SparseArrays.getcolptr(A::JLSparseDeviceMatrixCSC) = A.colptr +SparseArrays.rowvals(A::JLSparseDeviceMatrixCSC) = A.rowval diff --git a/src/host/sparse.jl b/src/host/sparse.jl index f7bdbcf3..17789386 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -98,36 +98,32 @@ for (wrapa, transa, opa, unwrapa) in trans_adj_wrappers_csc backend_A == backend_B == backend_C || throw(ArgumentError("All arrays must be on the same backend")) - @kernel function kernel_spmatmul_N!(C, @Const(A_colptr), @Const(A_rowval), @Const(A_nzval), @Const(B)) + @kernel function kernel_spmatmul_N!(C, @Const(A), @Const(B)) k, col = @index(Global, NTuple) Bi, Bj = $(indB(:col, :k)) @inbounds axj = $(opb(:(B[Bi, Bj]))) * α - @inbounds for j in A_colptr[col]:(A_colptr[col+1]-1) # nzrange(A, col) - KernelAbstractions.@atomic C[A_rowval[j], k] += $(opa(:(A_nzval[j]))) * axj + @inbounds for j in getcolptr(A)[col]:(getcolptr(A)[col+1]-1) # nzrange(A, col) + KernelAbstractions.@atomic C[getrowval(A)[j], k] += $(opa(:(getnzval(A)[j]))) * axj end end - @kernel function kernel_spmatmul_T!(C, @Const(A_colptr), @Const(A_rowval), @Const(A_nzval), @Const(B)) + @kernel function kernel_spmatmul_T!(C, @Const(A), @Const(B)) k, col = @index(Global, NTuple) tmp = zero(eltype(C)) - @inbounds for j in A_colptr[col]:(A_colptr[col+1]-1) # nzrange(A, col) - Bi, Bj = $(indB(:(A_rowval[j]), :k)) - tmp += $(opa(:(A_nzval[j]))) * $(opb(:(B[Bi, Bj]))) + @inbounds for j in getcolptr(A)[col]:(getcolptr(A)[col+1]-1) # nzrange(A, col) + Bi, Bj = $(indB(:(getrowval(A)[j]), :k)) + tmp += $(opa(:(getnzval(A)[j]))) * $(opb(:(B[Bi, Bj]))) end @inbounds C[col, k] += tmp * α end β != one(β) && LinearAlgebra._rmul_or_fill!(C, β) - A_colptr = getcolptr(_A) - A_rowval = getrowval(_A) - A_nzval = getnzval(_A) - kernel! = $kernel_spmatmul!(backend_A) - kernel!(C, A_colptr, A_rowval, A_nzval, _B; ndrange=(size(C, 2), size(_A, 2))) + kernel!(C, _A, _B; ndrange=(size(C, 2), size(_A, 2))) return C end From 2b85d054fa2e109971321bed33be9c27af61bafe Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Sun, 22 Dec 2024 16:33:15 +0100 Subject: [PATCH 8/9] Add Base operations --- src/host/sparse.jl | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index 17789386..a4380921 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -13,12 +13,25 @@ trans_adj_wrappers_csc = ((T -> :(AbstractGPUSparseMatrixCSC{$T}), false, identi SparseArrays.getnzval(V::AbstractGPUSparseVector) = nonzeros(V) SparseArrays.nnz(V::AbstractGPUSparseVector) = length(nzval(V)) +function unsafe_free!(V::AbstractGPUSparseVector) + unsafe_free!(nonzeroinds(V)) + unsafe_free!(nonzeros(V)) + return nothing +end + function Base.sizehint!(V::AbstractGPUSparseVector, newlen::Integer) sizehint!(nonzeroinds(V), newlen) sizehint!(nonzeros(V), newlen) return V end +Base.copy(V::AbstractGPUSparseVector) = typeof(V)(length(V), copy(nonzeroinds(V)), copy(nonzeros(V))) +Base.similar(V::AbstractGPUSparseVector) = copy(V) # We keep the same sparsity of the source + +Base.:(*)(α::Number, V::AbstractGPUSparseVector) = typeof(V)(length(V), copy(nonzeroinds(V)), α * nonzeros(V)) +Base.:(*)(V::AbstractGPUSparseVector, α::Number) = α * V +Base.:(/)(V::AbstractGPUSparseVector, α::Number) = typeof(V)(length(V), copy(nonzeroinds(V)), nonzeros(V) / α) + function LinearAlgebra.dot(x::AbstractGPUSparseVector, y::AbstractGPUVector) n = length(y) length(x) == n || throw(DimensionMismatch( @@ -54,14 +67,19 @@ end SparseArrays.getrowval(A::AbstractGPUSparseMatrixCSC) = rowvals(A) # SparseArrays.nzrange(A::AbstractGPUSparseMatrixCSC, col::Integer) = getcolptr(A)[col]:(getcolptr(A)[col+1]-1) # TODO: this uses scalar indexing -function _goodbuffers_csc(m, n, colptr, rowval, nzval) - return (length(colptr) == n + 1 && length(rowval) == length(nzval)) - # TODO: also add the condition that colptr[end] - 1 == length(nzval) (allowscalar?) +function unsafe_free!(A::AbstractGPUSparseMatrixCSC) + unsafe_free!(getcolptr(A)) + unsafe_free!(rowvals(A)) + unsafe_free!(nonzeros(A)) + return nothing end -# @inline function LinearAlgebra.mul!(C::AbstractGPUVector, A::AnyGPUSparseMatrixCSC, B::AbstractGPUVector, α::Number, β::Number) -# return LinearAlgebra.generic_matvecmul!(C, LinearAlgebra.wrapper_char(A), LinearAlgebra._unwrap(A), B, LinearAlgebra.MulAddMul(α, β)) -# end +Base.copy(A::AbstractGPUSparseMatrixCSC) = typeof(A)(size(A), copy(getcolptr(A)), copy(rowvals(A)), copy(getnzval(A))) +Base.similar(A::AbstractGPUSparseMatrixCSC) = copy(A) # We keep the same sparsity of the source + +Base.:(*)(α::Number, A::AbstractGPUSparseMatrixCSC) = typeof(A)(size(A), copy(getcolptr(A)), copy(rowvals(A)), α * nonzeros(A)) +Base.:(*)(A::AbstractGPUSparseMatrixCSC, α::Number) = α * A +Base.:(/)(A::AbstractGPUSparseMatrixCSC, α::Number) = typeof(A)(size(A), copy(getcolptr(A)), copy(rowvals(A)), nonzeros(A) / α) @inline function LinearAlgebra.generic_matvecmul!(C::AbstractGPUVector, tA, A::AbstractGPUSparseMatrixCSC, B::AbstractGPUVector, _add::LinearAlgebra.MulAddMul) return _spmatmul!(C, wrap(A, tA), B, _add.alpha, _add.beta) @@ -129,3 +147,8 @@ for (wrapa, transa, opa, unwrapa) in trans_adj_wrappers_csc end end end + +function _goodbuffers_csc(m, n, colptr, rowval, nzval) + return (length(colptr) == n + 1 && length(rowval) == length(nzval)) + # TODO: also add the condition that colptr[end] - 1 == length(nzval) (allowscalar?) +end From b3bc304e2ad654d8f42f029fa6e6567471872190 Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Sun, 22 Dec 2024 16:33:29 +0100 Subject: [PATCH 9/9] Add Broadcasting --- src/host/sparse.jl | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/host/sparse.jl b/src/host/sparse.jl index a4380921..01bad273 100644 --- a/src/host/sparse.jl +++ b/src/host/sparse.jl @@ -152,3 +152,30 @@ function _goodbuffers_csc(m, n, colptr, rowval, nzval) return (length(colptr) == n + 1 && length(rowval) == length(nzval)) # TODO: also add the condition that colptr[end] - 1 == length(nzval) (allowscalar?) end + +## Broadcasting + +# broadcast container type promotion for combinations of sparse arrays and other types +struct GPUSparseVecStyle <: Broadcast.AbstractArrayStyle{1} end +struct GPUSparseMatStyle <: Broadcast.AbstractArrayStyle{2} end +Broadcast.BroadcastStyle(::Type{<:AbstractGPUSparseVector}) = GPUSparseVecStyle() +Broadcast.BroadcastStyle(::Type{<:AbstractGPUSparseMatrix}) = GPUSparseMatStyle() +const SPVM = Union{GPUSparseVecStyle,GPUSparseMatStyle} + +# GPUSparseVecStyle handles 0-1 dimensions, GPUSparseMatStyle 0-2 dimensions. +# GPUSparseVecStyle promotes to GPUSparseMatStyle for 2 dimensions. +# Fall back to DefaultArrayStyle for higher dimensionality. +GPUSparseVecStyle(::Val{0}) = GPUSparseVecStyle() +GPUSparseVecStyle(::Val{1}) = GPUSparseVecStyle() +GPUSparseVecStyle(::Val{2}) = GPUSparseMatStyle() +GPUSparseVecStyle(::Val{N}) where N = Broadcast.DefaultArrayStyle{N}() +GPUSparseMatStyle(::Val{0}) = GPUSparseMatStyle() +GPUSparseMatStyle(::Val{1}) = GPUSparseMatStyle() +GPUSparseMatStyle(::Val{2}) = GPUSparseMatStyle() +GPUSparseMatStyle(::Val{N}) where N = Broadcast.DefaultArrayStyle{N}() + +Broadcast.BroadcastStyle(::GPUSparseMatStyle, ::GPUSparseVecStyle) = GPUSparseMatStyle() + +# Tuples promote to dense +Broadcast.BroadcastStyle(::GPUSparseVecStyle, ::Broadcast.Style{Tuple}) = Broadcast.DefaultArrayStyle{1}() +Broadcast.BroadcastStyle(::GPUSparseMatStyle, ::Broadcast.Style{Tuple}) = Broadcast.DefaultArrayStyle{2}()