diff --git a/Manifest.toml b/Manifest.toml index 81e42d5d5..ade092b24 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -14,9 +14,9 @@ version = "0.3.3" [[Adapt]] deps = ["LinearAlgebra"] -git-tree-sha1 = "0fac443759fa829ed8066db6cf1077d888bb6573" +git-tree-sha1 = "95f8bda0555209f122bc796b0382ea4a3a121720" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -version = "2.0.2" +version = "2.1.0" [[ArnoldiMethod]] deps = ["DelimitedFiles", "LinearAlgebra", "Random", "SparseArrays", "StaticArrays", "Test"] @@ -25,10 +25,10 @@ uuid = "ec485272-7323-5ecc-a04f-4719b315124d" version = "0.0.4" [[ArrayLayouts]] -deps = ["FillArrays", "LinearAlgebra", "SparseArrays"] -git-tree-sha1 = "bd09f450716f55c5a47b24de277a8825e2450729" +deps = ["Compat", "FillArrays", "LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "d6f1ecde467019346f7601fb2ee58cb2bc60d121" uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a" -version = "0.4.7" +version = "0.4.8" [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" @@ -62,23 +62,35 @@ git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" version = "0.4.1" +[[CSV]] +deps = ["CategoricalArrays", "DataFrames", "Dates", "Mmap", "Parsers", "PooledArrays", "SentinelArrays", "Tables", "Unicode"] +git-tree-sha1 = "a390152e6850405a48ca51bd7ca33d11a21d6230" +uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" +version = "0.7.7" + [[CUDA]] deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"] git-tree-sha1 = "83bfd180e2f842f6d4ee315a6db8665e9aa0c19b" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" version = "1.3.3" +[[CategoricalArrays]] +deps = ["DataAPI", "Future", "JSON", "Missings", "Printf", "Statistics", "StructTypes", "Unicode"] +git-tree-sha1 = "2ac27f59196a68070e132b25713f9a5bbc5fa0d2" +uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" +version = "0.8.3" + [[ChainRules]] deps = ["ChainRulesCore", "LinearAlgebra", "Random", "Reexport", "Requires", "Statistics"] -git-tree-sha1 = "dded7909a68d11a813a05bd70a7e728093742b56" +git-tree-sha1 = "60b76639ff1dc573b0708a68924539d03ed6520b" uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" -version = "0.7.17" +version = "0.7.20" [[ChainRulesCore]] -deps = ["MuladdMacro"] -git-tree-sha1 = "8013d73583b79df2b5d8fc71e3c43f9246477fea" +deps = ["LinearAlgebra", "MuladdMacro"] +git-tree-sha1 = "ac64a416997ae87eb86550020d0607ff608253d1" uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -version = "0.9.7" +version = "0.9.10" [[CodecZlib]] deps = ["TranscodingStreams", "Zlib_jll"] @@ -106,9 +118,9 @@ version = "0.3.0" [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "215f1c81cfd1c5416cd78740bff8ef59b24cd7c0" +git-tree-sha1 = "7c7f4cda0d58ec999189d70f5ee500348c4b4df1" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.15.0" +version = "3.16.0" [[CompilerSupportLibraries_jll]] deps = ["Libdl", "Pkg"] @@ -139,12 +151,23 @@ git-tree-sha1 = "f2be642d7a94e7f0cabcd2106fee4c6715d452d1" uuid = "124859b0-ceae-595e-8997-d05f6a7a8dfe" version = "0.7.2" +[[DataFrames]] +deps = ["CategoricalArrays", "Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] +git-tree-sha1 = "a7c1c9a6e47a92321bbc9d500dab9b04cc4a6a39" +uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +version = "0.21.7" + [[DataStructures]] deps = ["InteractiveUtils", "OrderedCollections"] git-tree-sha1 = "88d48e133e6d3dd68183309877eac74393daa7eb" uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" version = "0.17.20" +[[DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + [[Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" @@ -180,12 +203,6 @@ git-tree-sha1 = "7fce513fcda766962ff67c5596cb16c463dfd371" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" version = "0.1.2" -[[FileIO]] -deps = ["Pkg"] -git-tree-sha1 = "992b4aeb62f99b69fcf0cb2085094494cc05dfb3" -uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -version = "1.4.3" - [[FillArrays]] deps = ["LinearAlgebra", "Random", "SparseArrays"] git-tree-sha1 = "b955c227b0d1413a1a97e2ca0635a5de019d7337" @@ -222,9 +239,9 @@ uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" [[GPUArrays]] deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"] -git-tree-sha1 = "600f45500060894487832c2f00c203fe3e0cb264" +git-tree-sha1 = "da6398282abd2a8c0dc3e55b49d984fcc2c582e5" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" -version = "5.1.0" +version = "5.2.1" [[GPUCompiler]] deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs", "UUIDs"] @@ -238,6 +255,12 @@ git-tree-sha1 = "83881eb8eda839992f6310d9c4514b7ffb5498fe" uuid = "a1251efa-393a-423f-9d7b-faaecba535dc" version = "0.1.0" +[[GraphMLDatasets]] +deps = ["CSV", "DataDeps", "HTTP", "JLD2", "JSON", "LightGraphs", "MAT", "PyCall", "SparseArrays"] +git-tree-sha1 = "dbde5c2fc53b41542d10fa16490520e7c58ec39e" +uuid = "21828b05-d3b3-40ad-870e-a4bc2f52d5e8" +version = "0.1.0" + [[GraphSignals]] deps = ["GraphLaplacians", "LightGraphs", "LinearAlgebra", "SimpleWeightedGraphs", "Zygote"] git-tree-sha1 = "6e62e16c779458412951a71f4d535f05a1e0bb89" @@ -252,15 +275,15 @@ version = "0.13.6" [[HDF5_jll]] deps = ["Libdl", "Pkg", "Zlib_jll"] -git-tree-sha1 = "85bd2e586a10ae0eab856125bf5245e0d36384a7" +git-tree-sha1 = "3dbc683172cb53428907485a4bb98a29d3874083" uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" -version = "1.10.5+5" +version = "1.10.5+6" [[HTTP]] deps = ["Base64", "Dates", "IniFile", "MbedTLS", "Sockets"] -git-tree-sha1 = "2ac03263ce44be4222342bca1c51c36ce7566161" +git-tree-sha1 = "c7ec02c4c6a039a98a15f955462cd7aea5df4508" uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "0.8.17" +version = "0.8.19" [[IRTools]] deps = ["InteractiveUtils", "MacroTools", "Test"] @@ -283,11 +306,22 @@ version = "0.5.0" deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +[[InvertedIndices]] +deps = ["Test"] +git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc" +uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" +version = "1.0.0" + +[[IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + [[JLD2]] -deps = ["CodecZlib", "DataStructures", "FileIO", "Mmap", "Pkg", "Printf", "UUIDs"] -git-tree-sha1 = "9353b717ee4e27beab4e902c92a06bb5f160d2cf" +deps = ["CodecZlib", "DataStructures", "MacroTools", "Mmap", "Pkg", "Printf", "Requires", "UUIDs"] +git-tree-sha1 = "7c0ca2c795e5f641f3ef90d57be300cf6ec2b893" uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -version = "0.1.14" +version = "0.2.1" [[JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] @@ -297,9 +331,9 @@ version = "0.21.1" [[Juno]] deps = ["Base64", "Logging", "Media", "Profile"] -git-tree-sha1 = "90976c3ab792a98d240d42f9df07420ccfc60668" +git-tree-sha1 = "07cb43290a840908a771552911a6274bc6c072c7" uuid = "e5e0dc1b-0480-54bc-9374-aad01c23163d" -version = "0.8.3" +version = "0.8.4" [[LLVM]] deps = ["CEnum", "Libdl", "Printf", "Unicode"] @@ -374,10 +408,10 @@ uuid = "e89f7d12-3494-54d1-8411-f7d8b9ae1f27" version = "0.5.0" [[MetaGraphs]] -deps = ["JLD2", "LightGraphs"] -git-tree-sha1 = "c6a4c88304e1ecef6fc372f12d3b8e427e128c1a" +deps = ["JLD2", "LightGraphs", "Random"] +git-tree-sha1 = "df1706b656e11e7bcf5997a51501e40fab84f567" uuid = "626554b9-1ddb-594c-aa3c-2596fe9399a5" -version = "0.6.3" +version = "0.6.6" [[Missings]] deps = ["DataAPI"] @@ -405,9 +439,9 @@ uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" version = "0.3.4" [[OffsetArrays]] -git-tree-sha1 = "b8500f9d73999cfbab4add5136ec26894081581e" +git-tree-sha1 = "9011c7c98769c451f83869a4d66461e2f23bc80b" uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.1.3" +version = "1.2.1" [[OpenSpecFun_jll]] deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] @@ -416,9 +450,9 @@ uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" version = "0.5.3+3" [[OrderedCollections]] -git-tree-sha1 = "293b70ac1780f9584c89268a6e2a560d938a7065" +git-tree-sha1 = "16c08bf5dba06609fe45e30860092d6fa41fde7b" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.3.0" +version = "1.3.1" [[Parsers]] deps = ["Dates", "Test"] @@ -430,6 +464,12 @@ version = "1.0.10" deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +[[PooledArrays]] +deps = ["DataAPI"] +git-tree-sha1 = "b1333d4eced1826e15adbdf01a4ecaccca9d353c" +uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" +version = "0.5.3" + [[Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" @@ -460,9 +500,9 @@ version = "0.2.0" [[Requires]] deps = ["UUIDs"] -git-tree-sha1 = "8c08d0c7812169e438a8478dae2a529377ad13f7" +git-tree-sha1 = "2fc2e1ab606a5dca7bbad9036a694553c3a57926" uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.0.2" +version = "1.0.3" [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" @@ -485,6 +525,12 @@ git-tree-sha1 = "4e1d9dcde9d934fc2e38013d4d49277f09556529" uuid = "b1168b60-8710-48c1-88d2-5c53ae207dd0" version = "0.1.1" +[[SentinelArrays]] +deps = ["Dates", "Random"] +git-tree-sha1 = "7a74946ace3b34fbb6c10e61b6e250b33d7e758c" +uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" +version = "1.2.15" + [[Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" @@ -539,6 +585,24 @@ git-tree-sha1 = "d72a47c47c522e283db774fc8c459dd5ed773710" uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" version = "0.33.1" +[[StructTypes]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "1ed04f622a39d2e5a6747c3a70be040c00333933" +uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" +version = "1.1.0" + +[[TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.0" + +[[Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] +git-tree-sha1 = "b7f762e9820b7fab47544c36f26f54ac59cf8abf" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.0.5" + [[Test]] deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -598,9 +662,9 @@ version = "1.4.5+1" [[Zygote]] deps = ["AbstractFFTs", "ArrayLayouts", "ChainRules", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "Future", "IRTools", "InteractiveUtils", "LinearAlgebra", "LoopVectorization", "MacroTools", "NNlib", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] -git-tree-sha1 = "b0a948a0a78e3e41515714fa1ef4f40a284ffa06" +git-tree-sha1 = "e7b3106f045bd6e526708d1a7821ee9ecc24d094" uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" -version = "0.5.6" +version = "0.5.7" [[ZygoteRules]] deps = ["MacroTools"] diff --git a/Project.toml b/Project.toml index e1f2bfca6..e1c4b3045 100644 --- a/Project.toml +++ b/Project.toml @@ -5,21 +5,16 @@ version = "0.7.0" [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -DataDeps = "124859b0-ceae-595e-8997-d05f6a7a8dfe" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +GraphMLDatasets = "21828b05-d3b3-40ad-870e-a4bc2f52d5e8" GraphSignals = "3ebe565e-a4b5-49c6-aed2-300248c3a9c1" -HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" -IRTools = "7869d1d1-7146-5819-86e3-90919afe41df" -JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -MAT = "23992714-dd62-5051-b70f-ba57cb901cac" MetaGraphs = "626554b9-1ddb-594c-aa3c-2596fe9399a5" -PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Requires = "ae029012-a4dd-5104-9daa-d747884805df" ScatterNNlib = "b1168b60-8710-48c1-88d2-5c53ae207dd0" SimpleWeightedGraphs = "47aef6b3-ad0c-573a-a1e2-d07658019622" @@ -31,19 +26,14 @@ ZygoteRules = "700de1a5-db45-46bc-99cf-38207098b444" [compat] CUDA = "1.2" -DataDeps = "0.7" DataStructures = "0.17, 0.18" FillArrays = "0.8 - 0.9" Flux = "0.10 - 0.11" +GraphMLDatasets = "0.1" GraphSignals = "0.1" -HTTP = "0.8" -IRTools = "0.4" -JLD2 = "0.1 - 0.2" -JSON = "0.21" LightGraphs = "1.3" -MAT = "0.8" -MetaGraphs = "< 0.6.4" -PyCall = "1.91" +MetaGraphs = "0.6" +Reexport = "0.2" Requires = "1.0.0" ScatterNNlib = "0.1" SimpleWeightedGraphs = "1.1" diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl index aa1c1c677..ae783458c 100644 --- a/src/GeometricFlux.jl +++ b/src/GeometricFlux.jl @@ -4,7 +4,6 @@ using Statistics: mean using SparseArrays: SparseMatrixCSC using LinearAlgebra: Adjoint, norm -using DataDeps: DataDep, register using FillArrays: Fill using Flux using Flux: glorot_uniform, leakyrelu, GRUCell @@ -19,11 +18,6 @@ using ZygoteRules import Flux: maxpool, meanpool export - datasets, - traindata, - validdata, - testdata, - # layers/gn GraphNet, update_edge, @@ -86,7 +80,7 @@ export const IntOrTuple = Union{Integer,Tuple} -include("datasets/Datasets.jl") +include("datasets.jl") include("pool.jl") diff --git a/src/datasets.jl b/src/datasets.jl new file mode 100644 index 000000000..c64b86add --- /dev/null +++ b/src/datasets.jl @@ -0,0 +1,7 @@ +module Datasets + +using Reexport + +@reexport using GraphMLDatasets + +end diff --git a/src/datasets/Datasets.jl b/src/datasets/Datasets.jl deleted file mode 100644 index 1f4f6b77f..000000000 --- a/src/datasets/Datasets.jl +++ /dev/null @@ -1,41 +0,0 @@ -module Datasets - using DataDeps: DataDep, register, @datadep_str - using HTTP - using JLD2 - using JSON - using LightGraphs: SimpleDiGraph, add_edge! - using MAT - using PyCall - using SparseArrays: SparseMatrixCSC, sparse - - export - Dataset, - Planetoid, - Cora, - PPI, - Reddit, - QM7b, - # Entities, - dataset, - traindata, - validdata, - testdata - - include("./dataset.jl") - include("./planetoid.jl") - include("./cora.jl") - include("./ppi.jl") - include("./reddit.jl") - include("./qm7b.jl") - # include("./entities.jl") - include("./datautils.jl") - - function __init__() - planetoid_init() - cora_init() - ppi_init() - reddit_init() - qm7b_init() - # entities_init() - end -end diff --git a/src/datasets/cora.jl b/src/datasets/cora.jl deleted file mode 100644 index 868c908fb..000000000 --- a/src/datasets/cora.jl +++ /dev/null @@ -1,55 +0,0 @@ -cora_init() = register(DataDep( - "Cora", - """ - The full Cora citation network dataset from the - `"Deep Gaussian Embedding of Graphs: Unsupervised Inductive Learning via - Ranking" `_ paper. - Nodes represent documents and edges represent citation links. - """, - "https://github.com/abojchevski/graph2gauss/raw/master/data/cora.npz", - "62e054f93be00a3dedb15b7ac15a2a07168ceab68b40bf95f54d2289d024c6bc"; - post_fetch_method=preprocess_cora, -)) - -function preprocess_cora(local_path) - py""" - import numpy as np - import scipy.sparse as sp - data = np.load($local_path, allow_pickle=True) - A = sp.csr_matrix((data['adj_data'], data['adj_indices'], data['adj_indptr']), shape=data['adj_shape']) - X = sp.csr_matrix((data['attr_data'], data['attr_indices'], data['attr_indptr']), shape=data['attr_shape']) - """ - - graph = SparseMatrixCSC(Array(py"A.toarray()")) - X = SparseMatrixCSC(Array(py"X.toarray()")) - y = py"data['labels']" - - # trainfile = replace(local_path, "cora.npz"=>"cora.train.jld2") - # testfile = replace(local_path, "cora.npz"=>"cora.test.jld2") - # @save trainfile graph train_X train_y - # @save testfile graph test_X test_y - - datafile = replace(local_path, "cora.npz"=>"cora.all.jld2") - @save datafile graph X y -end - -struct Cora <: Dataset -end - -function dataset(cora::Cora) - file = datadep"Cora/cora.all.jld2" - @load file graph X y - graph, X, y -end - -# function traindata(cora::Cora) -# file = datadep"Cora/cora.train.jld2" -# @load file graph train_X train_y -# graph, train_X, train_y -# end - -# function testdata(dataset::Symbol) -# file = datadep"Cora/cora.test.jld2" -# @load file graph test_X test_y -# graph, test_X, test_y -# end diff --git a/src/datasets/dataset.jl b/src/datasets/dataset.jl deleted file mode 100644 index 1b56fb88d..000000000 --- a/src/datasets/dataset.jl +++ /dev/null @@ -1,13 +0,0 @@ -abstract type Dataset end - -# function dataset(::Dataset) -# throw() -# end - -# function traindata(::Dataset) - -# end - -# function testdata(::Dataset) - -# end \ No newline at end of file diff --git a/src/datasets/datautils.jl b/src/datasets/datautils.jl deleted file mode 100644 index 9d406268e..000000000 --- a/src/datasets/datautils.jl +++ /dev/null @@ -1,12 +0,0 @@ -function download_file(url, path) - HTTP.open("GET", url) do http - open(path, "w") do file - write(file, http) - end - end -end - -function unzip(zipfile::String) - f = replace(zipfile, ".zip"=>"") - run(`unzip $f`) -end \ No newline at end of file diff --git a/src/datasets/entities.jl b/src/datasets/entities.jl deleted file mode 100644 index 8e9759e06..000000000 --- a/src/datasets/entities.jl +++ /dev/null @@ -1,13 +0,0 @@ -entities_init() = register(DataDep( - "Cora full datasets", - """ - The relational entities networks "AIFB", "MUTAG", "BGS" and "AM" from - the `"Modeling Relational Data with Graph Convolutional Networks" - `_ paper. - Training and test splits are given by node indices. - """, - "https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/{}.tgz", - ""; - fetch_method=http_download, - post_fetch_method=DataDeps.unpack, -)) diff --git a/src/datasets/planetoid.jl b/src/datasets/planetoid.jl deleted file mode 100644 index 32192ba42..000000000 --- a/src/datasets/planetoid.jl +++ /dev/null @@ -1,90 +0,0 @@ -const PLANETOID_URL = "https://github.com/kimiyoung/planetoid/raw/master/data" -const PLANETOID_DATASETS = [:citeseer, :cora, :pubmed] -const EXTS = ["allx", "ally", "graph", "test.index", "tx", "ty", "x", "y"] -const DATAURLS = [joinpath(PLANETOID_URL, "ind.$(d).$(ext)") for d in PLANETOID_DATASETS, ext in EXTS] - -planetoid_init() = register(DataDep( - "Planetoid", - """ - The citation network datasets "Cora", "CiteSeer", "PubMed" from - "Revisiting Semi-Supervised Learning with Graph Embeddings" - paper. - Nodes represent documents and edges represent citation links. - """, - reshape(DATAURLS, :), - "f52b3d47f5993912d7509b51e8090b6807228c4ba8c7d906f946868005c61c18"; - post_fetch_method=preprocess_planetoid, -)) - -function preprocess_planetoid(local_path) - for dataset in PLANETOID_DATASETS - graph_file = @datadep_str "Planetoid/ind.$(dataset).graph" - trainX_file = @datadep_str "Planetoid/ind.$(dataset).x" - trainy_file = @datadep_str "Planetoid/ind.$(dataset).y" - testX_file = @datadep_str "Planetoid/ind.$(dataset).tx" - testy_file = @datadep_str "Planetoid/ind.$(dataset).ty" - - train_X = read_data(trainX_file) - train_y = read_data(trainy_file) - test_X = read_data(testX_file) - test_y = read_data(testy_file) - graph = read_graph(graph_file) - - trainfile = replace(graph_file, "ind.$(dataset).graph"=>"$(dataset).train.jld2") - testfile = replace(graph_file, "ind.$(dataset).graph"=>"$(dataset).test.jld2") - @save trainfile graph train_X train_y - @save testfile graph test_X test_y - end -end - -function read_data(filename) - py""" - import pickle - from scipy.sparse import csr_matrix - - with open($filename,"rb") as f: - u = pickle._Unpickler(f) - u.encoding = "latin1" - data = u.load() - - if type(data) is csr_matrix: - data = data.toarray() - """ - return SparseMatrixCSC(Array(py"data")) -end - -read_index(filename) = map(x -> parse(Int64, x), readlines(filename)) - -function read_graph(filename) - py""" - import pickle - - with open($filename,"rb") as f: - u = pickle._Unpickler(f) - u.encoding = "latin1" - data = u.load() - """ - return Dict(py"data") -end - -struct Planetoid <: Dataset - dataset::Symbol - - function Planetoid(ds::Symbol) - ds in PLANETOID_DATASETS || throw(error("`dataset` should be one of citeseer, cora, pubmed.")) - new(ds) - end -end - - -function traindata(pla::Planetoid) - file = @datadep_str "Planetoid/$(pla.dataset).train.jld2" - @load file graph train_X train_y - graph, train_X, train_y -end - -function testdata(pla::Planetoid) - file = @datadep_str "Planetoid/$(pla.dataset).test.jld2" - @load file graph test_X test_y - graph, test_X, test_y -end diff --git a/src/datasets/ppi.jl b/src/datasets/ppi.jl deleted file mode 100644 index 458eceff5..000000000 --- a/src/datasets/ppi.jl +++ /dev/null @@ -1,70 +0,0 @@ -ppi_init() = register(DataDep( - "PPI", - """ - The protein-protein interaction networks from the `"Predicting - Multicellular Function through Multi-layer Tissue Networks" - `_ paper, containing positional gene - sets, motif gene sets and immunological signatures as features (50 in - total) and gene ontology sets as labels (121 in total). - """, - "https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/ppi.zip", - "1f5b2b09ac0f897fa6aa1338c64ab75a5473674cbba89380120bede8cddb2a6a"; - post_fetch_method=preprocess_ppi, -)) - -function preprocess_ppi(local_path) - unzip(local_path) - - for phase in ["train", "test", "valid"] - graph_file = @datadep_str "PPI/$(phase)_graph.json" - id_file = @datadep_str "PPI/$(phase)_graph_id.npy" - X_file = @datadep_str "PPI/$(phase)_feats.npy" - y_file = @datadep_str "PPI/$(phase)_labels.npy" - - py""" - import numpy as np - ids = np.load($id_file) - X = np.load($X_file) - y = np.load($y_file) - """ - - X = Matrix{Float32}(py"X") - y = SparseMatrixCSC{Int32,Int64}(Array(py"y")) - ids = Array(py"ids") - graph = read_ppi_graph(graph_file) - - jld2file = replace(local_path, "ppi.zip"=>"ppi.$(phase).jld2") - @save jld2file graph X y ids - end -end - -function read_ppi_graph(filename::String) - d = JSON.Parser.parsefile(filename) - g = SimpleDiGraph{Int32}(length(d["nodes"])) - - for pair in d["links"] - add_edge!(g, pair["source"], pair["target"]) - end - g -end - -struct PPI <: Dataset -end - -function traindata(::PPI) - file = datadep"PPI/ppi.train.jld2" - @load file graph X y ids - graph, X, y, ids -end - -function validdata(::PPI) - file = datadep"PPI/ppi.valid.jld2" - @load file graph X y ids - graph, X, y, ids -end - -function testdata(::PPI) - file = datadep"PPI/ppi.test.jld2" - @load file graph X y ids - graph, X, y, ids -end \ No newline at end of file diff --git a/src/datasets/qm7b.jl b/src/datasets/qm7b.jl deleted file mode 100644 index 57842ae55..000000000 --- a/src/datasets/qm7b.jl +++ /dev/null @@ -1,29 +0,0 @@ -qm7b_init() = register(DataDep( - "QM7b", - """ - The QM7b dataset from the `"MoleculeNet: A Benchmark for Molecular - Machine Learning" `_ paper, consisting of - 7,211 molecules with 14 regression targets. - """, - "http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/qm7b.mat", - "e2a9d670d86eba769fa7b5eadeb592184067d2ec12468b1a220bfc38502dda61"; - post_fetch_method=preprocess_qm7b, -)) - -function preprocess_qm7b(local_path) - vars = matread(local_path) - names = vars["names"] - X = vars["X"] - T = Matrix{Float32}(vars["T"]) - - jld2file = replace(local_path, "qm7b.mat"=>"qm7b.all.jld2") - @save jld2file names X T -end - -struct QM7b <:Dataset end - -function dataset(::QM7b) - file = datadep"QM7b/qm7b.all.jld2" - @load file names X T - names, X, T -end diff --git a/src/datasets/reddit.jl b/src/datasets/reddit.jl deleted file mode 100644 index 9a0a2fbcd..000000000 --- a/src/datasets/reddit.jl +++ /dev/null @@ -1,45 +0,0 @@ -reddit_init() = register(DataDep( - "Reddit", - """ - The Reddit dataset from the `"Inductive Representation Learning on - Large Graphs" `_ paper, containing - Reddit posts belonging to different communities. - """, - "https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/reddit.zip", - "9a16353c28f8ddd07148fc5ac9b57b818d7911ea0fbe9052d66d49fc32b372bf"; - post_fetch_method=preprocess_reddit, -)) - -function preprocess_reddit(local_path) - unzip(local_path) - - graph_file = datadep"Reddit/reddit_graph.npz" - data_file = datadep"Reddit/reddit_data.npz" - - py""" - import numpy as np - import scipy.sparse as sp - graph = np.load($graph_file, allow_pickle=True) - data = np.load($data_file, allow_pickle=True) - """ - - graph = sparse(Vector(py"graph['row']") .+ 1, - Vector(py"graph['col']") .+ 1, - Vector{Int32}(py"graph['data']")) - X = Matrix{Float32}(py"data['feature']") - y = Vector{Int32}(py"data['label']") - ids = Vector{Int32}(py"data['node_ids']") - types = Vector{Int32}(py"data['node_types']") - - jld2file = replace(local_path, "reddit.zip"=>"reddit.all.jld2") - @save jld2file graph X y ids types -end - -struct Reddit <: Dataset -end - -function dataset(::Reddit) - file = datadep"Reddit/reddit.all.jld2" - @load file graph X y ids types - graph, X, y, ids, types -end \ No newline at end of file diff --git a/test/datasets/cora.jl b/test/datasets/cora.jl deleted file mode 100644 index 4d4b72c2b..000000000 --- a/test/datasets/cora.jl +++ /dev/null @@ -1,9 +0,0 @@ -@testset "cora" begin - g, X, y = dataset(Cora()) - @test typeof(g) == SparseMatrixCSC{Float32,Int64} - @test size(g) == (19793, 19793) - @test typeof(X) == SparseMatrixCSC{Float32,Int64} - @test size(X) == (19793, 8710) - @test typeof(y) == Vector{Int64} - @test size(y) == (19793,) -end \ No newline at end of file diff --git a/test/datasets/datasets.jl b/test/datasets/datasets.jl deleted file mode 100644 index 5f60e3f71..000000000 --- a/test/datasets/datasets.jl +++ /dev/null @@ -1,13 +0,0 @@ -tests = [ - "planetoid", - "cora", - "ppi", - "reddit", - "qm7b", -] - -@testset "datasets" begin - for t in tests - include("$(t).jl") - end -end \ No newline at end of file diff --git a/test/datasets/planetoid.jl b/test/datasets/planetoid.jl deleted file mode 100644 index 74210277a..000000000 --- a/test/datasets/planetoid.jl +++ /dev/null @@ -1,15 +0,0 @@ -@testset "planetoid" begin - g, train_X, train_y = GeometricFlux.Datasets.traindata(Planetoid(:cora)) - @test typeof(g) == Dict{Any,Any} - @test typeof(train_X) == SparseMatrixCSC{Float32,Int64} - @test size(train_X) == (140, 1433) - @test typeof(train_y) == SparseMatrixCSC{Int32,Int64} - @test size(train_y) == (140, 7) - - g, test_X, test_y = GeometricFlux.Datasets.testdata(Planetoid(:cora)) - @test typeof(g) == Dict{Any,Any} - @test typeof(test_X) == SparseMatrixCSC{Float32,Int64} - @test size(test_X) == (1000, 1433) - @test typeof(test_y) == SparseMatrixCSC{Int32,Int64} - @test size(test_y) == (1000, 7) -end \ No newline at end of file diff --git a/test/datasets/ppi.jl b/test/datasets/ppi.jl deleted file mode 100644 index cbba485f3..000000000 --- a/test/datasets/ppi.jl +++ /dev/null @@ -1,28 +0,0 @@ -@testset "ppi" begin - g, train_X, train_y, train_ids = traindata(PPI()) - @test typeof(g) == SimpleDiGraph{Int32} - @test nv(g) == 44906 - @test ne(g) == 1271267 - @test typeof(train_X) == Array{Float32,2} - @test size(train_X) == (44906, 50) - @test typeof(train_y) == SparseMatrixCSC{Int32,Int64} - @test size(train_y) == (44906, 121) - - g, valid_X, valid_y, valid_ids = validdata(PPI()) - @test typeof(g) == SimpleDiGraph{Int32} - @test nv(g) == 6514 - @test ne(g) == 205395 - @test typeof(valid_X) == Array{Float32,2} - @test size(valid_X) == (6514, 50) - @test typeof(valid_y) == SparseMatrixCSC{Int32,Int64} - @test size(valid_y) == (6514, 121) - - g, test_X, test_y, test_ids = testdata(PPI()) - @test typeof(g) == SimpleDiGraph{Int32} - @test nv(g) == 5524 - @test ne(g) == 167461 - @test typeof(test_X) == Array{Float32,2} - @test size(test_X) == (5524, 50) - @test typeof(test_y) == SparseMatrixCSC{Int32,Int64} - @test size(test_y) == (5524, 121) -end \ No newline at end of file diff --git a/test/datasets/qm7b.jl b/test/datasets/qm7b.jl deleted file mode 100644 index 490141252..000000000 --- a/test/datasets/qm7b.jl +++ /dev/null @@ -1,9 +0,0 @@ -@testset "qm7b" begin - names, X, T = dataset(QM7b()) - @test typeof(names) == Vector{String} - @test size(names) == (14,) - @test typeof(X) == Array{Float32,3} - @test size(X) == (7211, 23, 23) - @test typeof(T) == Matrix{Float32} - @test size(T) == (7211, 14) -end \ No newline at end of file diff --git a/test/datasets/reddit.jl b/test/datasets/reddit.jl deleted file mode 100644 index 5d461aacc..000000000 --- a/test/datasets/reddit.jl +++ /dev/null @@ -1,13 +0,0 @@ -@testset "reddit" begin - g, X, y, ids, types = dataset(Reddit()) - @test typeof(g) == SparseMatrixCSC{Int32,Int64} - @test size(g) == (232965, 232965) - @test typeof(X) == Matrix{Float32} - @test size(X) == (232965, 602) - @test typeof(y) == Vector{Int32} - @test size(y) == (232965,) - @test typeof(ids) == Vector{Int32} - @test size(ids) == (232965,) - @test typeof(types) == Vector{Int32} - @test size(types) == (232965,) -end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index dc7e63b9e..e23e0fe45 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,7 +19,6 @@ cuda_tests = [ ] tests = [ - "datasets/datasets", "layers/gn", "layers/msgpass", "layers/conv",