Skip to content
14 changes: 6 additions & 8 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

## [2.7.0] - 2025-10-14

### Fixed

- Fixed `ogbn_train_cugraph` example for distributed cuGraph ([#10439](https://github.com/pyg-team/pytorch_geometric/pull/10439))
- Added `safe_onnx_export` function with workarounds for `onnx_ir.serde.SerdeError` issues in ONNX export ([#10422](https://github.com/pyg-team/pytorch_geometric/pull/10422))
- Fixed importing PyTorch Lightning in `torch_geometric.graphgym` and `torch_geometric.data.lightning` when using `lightning` instead of `pytorch-lightning` ([#10404](https://github.com/pyg-team/pytorch_geometric/pull/10404), [#10417](https://github.com/pyg-team/pytorch_geometric/pull/10417)))
- Fixed `detach()` warnings in example scripts involving tensor conversions ([#10357](https://github.com/pyg-team/pytorch_geometric/pull/10357))
- Fixed non-tuple indexing to resolve PyTorch deprecation warning ([#10389](https://github.com/pyg-team/pytorch_geometric/pull/10389))

### Added

- Added llm generated explanations to `TAGDataset` ([#9918](https://github.com/pyg-team/pytorch_geometric/pull/9918))
Expand Down Expand Up @@ -91,6 +83,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Fixed

- Fixed broken datasets url ([#10415](https://github.com/pyg-team/pytorch_geometric/pull/10415))
- Fixed `ogbn_train_cugraph` example for distributed cuGraph ([#10439](https://github.com/pyg-team/pytorch_geometric/pull/10439))
- Fixed importing PyTorch Lightning in `torch_geometric.graphgym` and `torch_geometric.data.lightning` when using `lightning` instead of `pytorch-lightning` ([#10404](https://github.com/pyg-team/pytorch_geometric/pull/10404), [#10417](https://github.com/pyg-team/pytorch_geometric/pull/10417)))
- Added `safe_onnx_export` function with workarounds for `onnx_ir.serde.SerdeError` issues in ONNX export ([#10422](https://github.com/pyg-team/pytorch_geometric/pull/10422))
- Fixed `detach()` warnings in example scripts involving tensor conversions ([#10357](https://github.com/pyg-team/pytorch_geometric/pull/10357))
- Fixed non-tuple indexing to resolve PyTorch deprecation warning ([#10389](https://github.com/pyg-team/pytorch_geometric/pull/10389))
- Fixed conversion to/from `cuGraph` graph objects by ensuring `cudf` column names are correctly specified ([#10343](https://github.com/pyg-team/pytorch_geometric/pull/10343))
- Fixed `_recursive_config()` for `torch.nn.ModuleList` and `torch.nn.ModuleDict` ([#10124](https://github.com/pyg-team/pytorch_geometric/pull/10124), [#10129](https://github.com/pyg-team/pytorch_geometric/pull/10129))
- Fixed the `k_hop_subgraph()` method for directed graphs ([#9756](https://github.com/pyg-team/pytorch_geometric/pull/9756))
Expand Down
11 changes: 11 additions & 0 deletions test/datasets/test_deezer_europe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from torch_geometric.datasets import DeezerEurope
from torch_geometric.testing import onlyFullTest, onlyOnline


@onlyOnline
@onlyFullTest
def test_deezer_europe():
dataset = DeezerEurope(root='./data/DeezerEurope')
data = dataset[0]
assert data.x.size() == (28281, 128)
assert data.edge_index.size() == (2, 92752)
11 changes: 11 additions & 0 deletions test/datasets/test_facebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from torch_geometric.datasets import FacebookPagePage
from torch_geometric.testing import onlyFullTest, onlyOnline


@onlyOnline
@onlyFullTest
def test_facebook():
dataset = FacebookPagePage(root='./data/FacebookPagePage')
data = dataset[0]
assert data.x.size() == (22470, 128)
assert data.edge_index.size() == (2, 171002)
10 changes: 10 additions & 0 deletions test/datasets/test_gemsec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from torch_geometric.datasets import GemsecDeezer
from torch_geometric.testing import onlyFullTest, onlyOnline


@onlyOnline
@onlyFullTest
def test_gemsec():
dataset = GemsecDeezer(root='./data/GemsecDeezer', name='HU')
data = dataset[0]
assert data.edge_index.size() == (2, 222887)
11 changes: 11 additions & 0 deletions test/datasets/test_github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from torch_geometric.datasets import GitHub
from torch_geometric.testing import onlyFullTest, onlyOnline


@onlyOnline
@onlyFullTest
def test_github():
dataset = GitHub(root='./data/GitHub')
data = dataset[0]
assert data.x.size() == (37700, 128)
assert data.edge_index.size() == (2, 289003)
11 changes: 11 additions & 0 deletions test/datasets/test_lastfm_asia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from torch_geometric.datasets import LastFMAsia
from torch_geometric.testing import onlyFullTest, onlyOnline


@onlyOnline
@onlyFullTest
def test_lastfm_asia():
dataset = LastFMAsia(root='./data/LastFMAsia')
data = dataset[0]
assert data.x.size() == (7624, 128)
assert data.edge_index.size() == (2, 27806)
11 changes: 11 additions & 0 deletions test/datasets/test_twitch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from torch_geometric.datasets import Twitch
from torch_geometric.testing import onlyFullTest, onlyOnline


@onlyOnline
@onlyFullTest
def test_twitch():
dataset = Twitch(root='./data/Twitch', name='ES')
data = dataset[0]
assert data.x.size() == (4648, 128)
assert data.edge_index.size() == (2, 59382)
12 changes: 12 additions & 0 deletions test/datasets/test_wikipedia_network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from torch_geometric.datasets import WikipediaNetwork
from torch_geometric.testing import onlyFullTest, onlyOnline


@onlyOnline
@onlyFullTest
def test_wikipedia_network():
dataset = WikipediaNetwork(root='./data/WikipediaNetwork',
name='crocodile')
data = dataset[0]
assert data.x.size() == (11631, 128)
assert data.edge_index.size() == (2, 180020)
48 changes: 38 additions & 10 deletions torch_geometric/datasets/deezer_europe.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from typing import Callable, Optional
from typing import Callable, List, Optional

import numpy as np
import torch

from torch_geometric.data import Data, InMemoryDataset, download_url
from torch_geometric.data import (
Data,
InMemoryDataset,
download_url,
extract_zip,
)


class DeezerEurope(InMemoryDataset):
Expand All @@ -28,7 +33,7 @@ class DeezerEurope(InMemoryDataset):
(default: :obj:`False`)
"""

url = 'https://graphmining.ai/datasets/ptg/deezer_europe.npz'
url = 'https://snap.stanford.edu/data/deezer_europe.zip'

def __init__(
self,
Expand All @@ -42,21 +47,44 @@ def __init__(
self.load(self.processed_paths[0])

@property
def raw_file_names(self) -> str:
return 'deezer_europe.npz'
def raw_file_names(self) -> List[str]:
return [
f'deezer_europe/{x}' for x in [
'deezer_europe_edges.csv',
'deezer_europe_features.json',
'deezer_europe_target.csv',
]
]

@property
def processed_file_names(self) -> str:
return 'data.pt'

def download(self) -> None:
download_url(self.url, self.raw_dir)
file_path = download_url(self.url, self.raw_dir)
extract_zip(file_path, self.raw_dir)

def process(self) -> None:
data = np.load(self.raw_paths[0], 'r', allow_pickle=True)
x = torch.from_numpy(data['features']).to(torch.float)
y = torch.from_numpy(data['target']).to(torch.long)
edge_index = torch.from_numpy(data['edges']).to(torch.long)
import json

import pandas as pd
edges = pd.read_csv(self.raw_paths[0], dtype=int)
features = json.load(open(self.raw_paths[1]))
target = pd.read_csv(self.raw_paths[2], dtype=int)

xs = []
n_feats = 128
for i in target['id'].values:
f = [0] * n_feats
if str(i) in features:
n_len = len(features[str(i)])
f = features[str(
i)][:n_feats] if n_len >= n_feats else features[str(
i)] + [0] * (n_feats - n_len)
xs.append(f)
x = torch.from_numpy(np.array(xs)).to(torch.float)
y = torch.from_numpy(target.values[:, 1]).t().to(torch.long)
edge_index = torch.from_numpy(edges.values).to(torch.long)
edge_index = edge_index.t().contiguous()

data = Data(x=x, y=y, edge_index=edge_index)
Expand Down
51 changes: 41 additions & 10 deletions torch_geometric/datasets/facebook.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from typing import Callable, Optional
from typing import Callable, List, Optional

import numpy as np
import torch

from torch_geometric.data import Data, InMemoryDataset, download_url
from torch_geometric.data import (
Data,
InMemoryDataset,
download_url,
extract_zip,
)


class FacebookPagePage(InMemoryDataset):
Expand All @@ -27,7 +32,7 @@ class FacebookPagePage(InMemoryDataset):
(default: :obj:`False`)
"""

url = 'https://graphmining.ai/datasets/ptg/facebook.npz'
url = 'https://snap.stanford.edu/data/facebook_large.zip'

def __init__(
self,
Expand All @@ -41,21 +46,47 @@ def __init__(
self.load(self.processed_paths[0])

@property
def raw_file_names(self) -> str:
return 'facebook.npz'
def raw_file_names(self) -> List[str]:
return [
f'facebook_large/{x}' for x in [
'musae_facebook_edges.csv',
'musae_facebook_features.json',
'musae_facebook_target.csv',
]
]

@property
def processed_file_names(self) -> str:
return 'data.pt'

def download(self) -> None:
download_url(self.url, self.raw_dir)
file_path = download_url(self.url, self.raw_dir)
extract_zip(file_path, self.raw_dir)

def process(self) -> None:
data = np.load(self.raw_paths[0], 'r', allow_pickle=True)
x = torch.from_numpy(data['features']).to(torch.float)
y = torch.from_numpy(data['target']).to(torch.long)
edge_index = torch.from_numpy(data['edges']).to(torch.long)
import json

import pandas as pd
edges = pd.read_csv(self.raw_paths[0], dtype=int)
features = json.load(open(self.raw_paths[1]))
target = pd.read_csv(self.raw_paths[2])
page_type = sorted(target['page_type'].value_counts().index.tolist())
target['y'] = target['page_type'].apply(lambda x: page_type.index(x))

# import pdb; pdb.set_trace() # noqa: T201
xs = []
n_feats = 128
for i in target['id'].values:
f = [0] * n_feats
if str(i) in features:
n_len = len(features[str(i)])
f = features[str(
i)][:n_feats] if n_len >= n_feats else features[str(
i)] + [0] * (n_feats - n_len)
xs.append(f)
x = torch.from_numpy(np.array(xs)).to(torch.float)
y = torch.from_numpy(target['y'].values).t().to(torch.long)
edge_index = torch.from_numpy(edges.values).to(torch.long)
edge_index = edge_index.t().contiguous()

data = Data(x=x, y=y, edge_index=edge_index)
Expand Down
34 changes: 23 additions & 11 deletions torch_geometric/datasets/gemsec.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import os.path as osp
from typing import Callable, Optional
from typing import Callable, List, Optional

import numpy as np
import torch

from torch_geometric.data import Data, InMemoryDataset, download_url
from torch_geometric.data import (
Data,
InMemoryDataset,
download_url,
extract_tar,
)


class GemsecDeezer(InMemoryDataset):
Expand All @@ -31,7 +35,7 @@ class GemsecDeezer(InMemoryDataset):
(default: :obj:`False`)
"""

url = 'https://graphmining.ai/datasets/ptg/gemsec'
url = 'https://snap.stanford.edu/data/gemsec_deezer_dataset.tar.gz'

def __init__(
self,
Expand All @@ -56,23 +60,31 @@ def processed_dir(self) -> str:
return osp.join(self.root, self.name, 'processed')

@property
def raw_file_names(self) -> str:
return f'{self.name}.npz'
def raw_file_names(self) -> List[str]:
return [
f'deezer_clean_data/{x}' for x in [
f'{self.name}_edges.csv',
f'{self.name}_genres.json',
]
]

@property
def processed_file_names(self) -> str:
return 'data.pt'

def download(self) -> None:
download_url(osp.join(self.url, self.name + '.npz'), self.raw_dir)
file_path = download_url(self.url, self.raw_dir)
extract_tar(file_path, self.raw_dir)

def process(self) -> None:
data = np.load(self.raw_paths[0], 'r', allow_pickle=True)
y = torch.from_numpy(data['target']).to(torch.long)
edge_index = torch.from_numpy(data['edges']).to(torch.long)

import pandas as pd
edges = pd.read_csv(self.raw_paths[0], dtype=int)

edge_index = torch.from_numpy(edges.values).to(torch.long)
edge_index = edge_index.t().contiguous()

data = Data(y=y, edge_index=edge_index)
data = Data(edge_index=edge_index)

if self.pre_transform is not None:
data = self.pre_transform(data)
Expand Down
Loading
Loading