FluxML · yuehhua · Jul 22, 2020 · Jul 15, 2020 · Jul 15, 2020 · Jul 15, 2020
diff --git a/examples/gat.jl b/examples/gat.jl
@@ -1,46 +1,49 @@
 using GeometricFlux
 using Flux
-using Flux: onehotbatch, onecold, crossentropy, throttle
+using Flux: onehotbatch, onecold, logitcrossentropy, throttle
+using Flux: @epochs
 using JLD2  # use v0.1.2
 using Statistics: mean
 using SparseArrays
 using LightGraphs.SimpleGraphs
-using CuArrays
+using LightGraphs: adjacency_matrix
+using CUDA
 
 @load "data/cora_features.jld2" features
 @load "data/cora_labels.jld2" labels
 @load "data/cora_graph.jld2" g
 
 num_nodes = 2708
 num_features = 1433
-
 heads  = 8
 hidden = 8
 target_catg = 7
 epochs = 10
 
 ## Preprocessing data
-train_X = features |> gpu  # dim: num_features * num_nodes
-train_y = labels |> gpu  # dim: target_catg * num_nodes
+train_X = Float32.(features) |> gpu  # dim: num_features * num_nodes
+train_y = Float32.(labels) |> gpu  # dim: target_catg * num_nodes
+adj_mat = Matrix{Float32}(adjacency_matrix(g)) |> gpu
 
 ## Model
 model = Chain(GATConv(g, num_features=>hidden, heads=heads),
               Dropout(0.6),
-              GATConv(g, hidden=>target_catg, heads=heads),
+              GATConv(g, hidden*heads=>target_catg, heads=heads, concat=false),
               softmax) |> gpu
 # test model
 # model(train_X)
 
 ## Loss
-loss(x, y) = crossentropy(model(x), y)
+loss(x, y) = logitcrossentropy(model(x), y)
 accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
 
+# test gradient
+# gradient(X -> loss(X, train_y), train_X)
+
 ## Training
 ps = Flux.params(model)
 train_data = [(train_X, train_y)]
-opt = ADAM(0.01)
+opt = ADAM(0.05)
 evalcb() = @show(accuracy(train_X, train_y))
 
-for i = 1:epochs
-    Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, 10))
-end
+@epochs epochs Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, 10))
diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
@@ -3,7 +3,7 @@ module GeometricFlux
 using Statistics: mean
 using StaticArrays: StaticArray
 using SparseArrays: SparseMatrixCSC
-using LinearAlgebra: I, issymmetric, diagm, eigmax, norm, Adjoint, Diagonal
+using LinearAlgebra: I, issymmetric, diagm, eigmax, norm, Adjoint, Diagonal, eigen, Symmetric
 
 using Requires
 using DataStructures: DefaultDict

diff --git a/src/graph/index.jl b/src/graph/index.jl
@@ -27,9 +27,8 @@ value is accumulated numbers of edge (current vertex not included).
 """
 function accumulated_edges(adj::AbstractVector{<:AbstractVector{<:Integer}},
                            num_V=size(adj,1))
-    y = similar(adj[1], num_V+1)
-    y .= 0, cumsum(map(length, adj))...
-    y
+
+    return [0, cumsum(map(length, adj))...]
 end
 
 Zygote.@nograd accumulated_edges

diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -111,25 +111,18 @@ end
 @functor ChebConv
 
 function (c::ChebConv)(L̃::AbstractMatrix{S}, X::AbstractMatrix{T}) where {S<:Real, T<:Real}
-    fin = c.in_channel
-    @assert size(X, 1) == fin "Input feature size must match input channel size."
-    N = size(L̃, 1)
-    @assert size(X, 2) == N "Input vertex number must match Laplacian matrix size."
-    fout = c.out_channel
-
-    Z = similar(X, fin, N, c.k)
-    view(Z,:,:,1) .= X
-    view(Z,:,:,2) .= X * L̃
-    for k = 3:c.k
-        view(Z,:,:,k) .= 2*view(Z, :, :, k-1)*L̃ - view(Z, :, :, k-2)
-    end
+    @assert size(X, 1) == c.in_channel "Input feature size must match input channel size."
+    @assert size(X, 2) == size(L̃, 1) "Input vertex number must match Laplacian matrix size."
 
-    Y = view(c.weight, :, :, 1) * view(Z, :, :, 1)
-    for k = 2:c.k
-        Y += view(c.weight, :, :, k) * view(Z, :, :, k)
+    Z_prev = X
+    Z = X * L̃
+    Y = view(c.weight,:,:,1) * Z_prev
+    Y += view(c.weight,:,:,2) * Z
+    for k = 3:c.k
+        Z, Z_prev = 2*Z*L̃ - Z_prev, Z
+        Y += view(c.weight,:,:,k) * Z
     end
-    Y .+= c.bias
-    return Y
+    return Y .+ c.bias
 end
 
 function (c::ChebConv)(X::AbstractMatrix{T}) where {T<:Real}
@@ -286,9 +279,8 @@ function message(g::GATConv, x_i::AbstractVector, x_j::AbstractVector, e_ij)
     α = vcat(x_i, x_j+zero(x_j)) .* g.a
     α = reshape(sum(α, dims=1), g.heads)
     α = leakyrelu.(α, g.negative_slope)
-    α = _softmax(α)
-    x_j .*= reshape(α, 1, g.heads)
-    reshape(x_j, n*g.heads)
+    α = Flux.softmax(α)
+    reshape(x_j .* reshape(α, 1, g.heads), n*g.heads)
 end
 
 # The same as update function in batch manner
@@ -305,13 +297,6 @@ function (g::GATConv)(X::AbstractMatrix)
 end
 (g::GATConv)(fg::FeaturedGraph) = propagate(g, fg, :add)
 
-
-function _softmax(xs)
-    xs = exp.(xs)
-    s = sum(xs, dims=2)
-    return xs ./ s
-end
-
 function Base.show(io::IO, l::GATConv)
     in_channel = size(l.weight, ndims(l.weight))
     out_channel = size(l.weight, ndims(l.weight)-1)

diff --git a/src/operations/linalg.jl b/src/operations/linalg.jl
@@ -145,5 +145,7 @@ defined as ``\hat{L} = \frac{2}{\lambda_{max}} L - I`` where ``L`` is the normal
 - `T`: result element type of degree vector; default is the element type of `g` (optional).
 """
 function scaled_laplacian(adj::AbstractMatrix, T::DataType=eltype(adj))
-    T(2. / eigmax(adj)) * normalized_laplacian(adj, T) - I
+    @assert issymmetric(adj) "scaled_laplacian only works with symmetric matrices"
+    E = eigen(Symmetric(adj)).values
+    T(2. / maximum(E)) * normalized_laplacian(adj, T) - I
 end