Merge pull request #41 from kshyatt/ksh/ganex

yuehhua · web-flow · commit 713180385278 · 2020-05-02T00:57:18.000+08:00
Some updates to get the GCN example working...
diff --git a/examples/gat.jl b/examples/gat.jl
@@ -0,0 +1,46 @@
+using GeometricFlux
+using Flux
+using Flux: onehotbatch, onecold, crossentropy, throttle
+using JLD2  # use v0.1.2
+using Statistics: mean
+using SparseArrays
+using LightGraphs.SimpleGraphs
+using CuArrays
+
+@load "data/cora_features.jld2" features
+@load "data/cora_labels.jld2" labels
+@load "data/cora_graph.jld2" g
+
+num_nodes = 2708
+num_features = 1433
+
+heads  = 8
+hidden = 8
+target_catg = 7
+epochs = 10
+
+## Preprocessing data
+train_X = features |> gpu  # dim: num_features * num_nodes
+train_y = labels |> gpu  # dim: target_catg * num_nodes
+
+## Model
+model = Chain(GATConv(g, num_features=>hidden, heads=heads),
+              Dropout(0.6),
+              GATConv(g, hidden=>target_catg, heads=heads),
+              softmax) |> gpu
+# test model
+# model(train_X)
+
+## Loss
+loss(x, y) = crossentropy(model(x), y)
+accuracy(x, y) = mean(onecold(model(x)) .== onecold(y))
+
+## Training
+ps = Flux.params(model)
+train_data = [(train_X, train_y)]
+opt = ADAM(0.01)
+evalcb() = @show(accuracy(train_X, train_y))
+
+for i = 1:epochs
+    Flux.train!(loss, ps, train_data, opt, cb=throttle(evalcb, 10))
+end
diff --git a/examples/gcn.jl b/examples/gcn.jl
@@ -5,6 +5,7 @@ using JLD2  # use v0.1.2
 using Statistics: mean
 using SparseArrays
 using LightGraphs.SimpleGraphs
+using LightGraphs: adjacency_matrix
 using CuArrays
 
 @load "data/cora_features.jld2" features
@@ -15,19 +16,19 @@ num_nodes = 2708
 num_features = 1433
 hidden = 16
 target_catg = 7
-epochs = 10
+epochs = 20
 
 ## Preprocessing data
-train_X = features |> gpu  # dim: num_features * num_nodes
-train_y = labels |> gpu  # dim: target_catg * num_nodes
+train_X = Float32.(features) |> gpu  # dim: num_features * num_nodes
+train_y = Float32.(labels) |> gpu  # dim: target_catg * num_nodes
+
+adj_mat = Matrix{Float32}(adjacency_matrix(g)) |> gpu
 
 ## Model
-model = Chain(GCNConv(g, num_features=>hidden, relu),
+model = Chain(GCNConv(adj_mat, num_features=>hidden, relu),
               Dropout(0.5),
-              GCNConv(g, hidden=>target_catg),
+              GCNConv(adj_mat, hidden=>target_catg),
               softmax) |> gpu
-# test model
-# model(train_X)
 
 ## Loss
 loss(x, y) = crossentropy(model(x), y)
diff --git a/src/graph/simplegraphs.jl b/src/graph/simplegraphs.jl
@@ -1,5 +1,5 @@
-using LightGraphs: AbstractSimpleGraph, nv, adjacency_matrix
-
+using LightGraphs: AbstractSimpleGraph, nv, adjacency_matrix, inneighbors, outneighbors, all_neighbors
+import LightGraphs: adjacency_matrix
 
 ## Linear algebra API for AbstractSimpleGraph
 
@@ -19,12 +19,7 @@ function laplacian_matrix(sg::AbstractSimpleGraph, T::DataType=eltype(sg); dir::
     laplacian_matrix(adjacency_matrix(sg, T; dir=dir), T; dir=dir)
 end
 
-function normalized_laplacian(sg::AbstractSimpleGraph, T::DataType=eltype(sg); selfloop::Bool=false)
-    adj = adjacency_matrix(sg, T)
-    selfloop && (adj += I)
-    normalized_laplacian(adj, T)
-end
-
+adjacency_matrix(sg::Base.RefValue{<:AbstractSimpleGraph}, T::DataType=eltype(sg)) = adjacency_matrix(sg[], T)
 
 ## Convolution layers accepting AbstractSimpleGraph
 
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
@@ -26,22 +26,24 @@ end
 
 function GCNConv(ch::Pair{<:Integer,<:Integer}, σ = identity;
                  init=glorot_uniform, T::DataType=Float32, bias::Bool=true, cache::Bool=true)
-    b = bias ? init(ch[2]) : zeros(T, ch[2])
+    b = bias ? T.(init(ch[2])) : zeros(T, ch[2])
     graph = cache ? FeaturedGraph(nothing, nothing) : NullGraph()
-    GCNConv(init(ch[2], ch[1]), b, σ, graph)
+    GCNConv(T.(init(ch[2], ch[1])), b, σ, graph)
 end
 
 function GCNConv(adj::AbstractMatrix, ch::Pair{<:Integer,<:Integer}, σ = identity;
                  init=glorot_uniform, T::DataType=Float32, bias::Bool=true, cache::Bool=true)
-    b = bias ? init(ch[2]) : zeros(T, ch[2])
+    b = bias ? T.(init(ch[2])) : zeros(T, ch[2])
     graph = cache ? FeaturedGraph(adj, nothing) : NullGraph()
-    GCNConv(init(ch[2], ch[1]), b, σ, graph)
+    GCNConv(T.(init(ch[2], ch[1])), b, σ, graph)
 end
 
 @functor GCNConv
 
 function (g::GCNConv)(X::AbstractMatrix{T}) where {T}
-    g.σ.(g.weight * X * normalized_laplacian(graph(g.graph), T; selfloop=true) .+ g.bias)
+    W, b, σ = g.weight, g.bias, g.σ
+    nl = normalized_laplacian(graph(g.graph), float(T); selfloop=true)
+    σ.(W * X * nl .+ b)
 end
 
 function (g::GCNConv)(fg::FeaturedGraph)
diff --git a/src/linalg.jl b/src/linalg.jl
@@ -1,4 +1,5 @@
 ## Linear algebra API for adjacency matrix
+using LinearAlgebra
 
 """
     degrees(g[, T; dir=:out])
@@ -82,8 +83,8 @@ The values other than diagonal are zeros.
 - `dir`: direction of degree; should be `:in`, `:out`, or `:both` (optional).
 """
 function inv_sqrt_degree_matrix(adj::AbstractMatrix, T::DataType=eltype(adj); dir::Symbol=:out)
-    d = degrees(adj, T, dir=dir).^(-0.5)
-    return SparseMatrixCSC(T.(diagm(0=>d)))
+    d  = inv.(sqrt.(degrees(adj, T, dir=dir)))
+    return Diagonal(d)
 end
 
 """
@@ -110,10 +111,16 @@ Normalized Laplacian matrix of graph `g`.
 - `T`: result element type of degree vector; default is the element type of `g` (optional).
 - `selfloop`: adding self loop while calculating the matrix (optional).
 """
+function normalized_laplacian(sg, T::DataType=eltype(sg); selfloop::Bool=false)
+    adj = adjacency_matrix(sg, T)
+    selfloop && (adj += I)
+    normalized_laplacian(adj, T)
+end
+
 function normalized_laplacian(adj::AbstractMatrix, T::DataType=eltype(adj); selfloop::Bool=false)
     selfloop && (adj += I)
     inv_sqrtD = inv_sqrt_degree_matrix(adj, T, dir=:both)
-    I - inv_sqrtD * SparseMatrixCSC(T.(adj)) * inv_sqrtD
+    I - inv_sqrtD * adj * inv_sqrtD
 end
 
 function neighbors(adj::AbstractMatrix, T::DataType=eltype(adj))