diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ac45e0b0de5..03935cd54e25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- Added support for device conversions of `InMemoryDataset` ([#8402] (https://github.com/pyg-team/pytorch_geometric/pull/8402)) - Added support for edge-level temporal sampling in `NeighborLoader` and `LinkNeighborLoader` ([#8372] (https://github.com/pyg-team/pytorch_geometric/pull/8372)) - Added support for `torch.compile` in `ModuleDict` and `ParameterDict` ([#8363](https://github.com/pyg-team/pytorch_geometric/pull/8363)) - Added `force_reload` option to `Dataset` and `InMemoryDataset` to reload datasets ([#8352](https://github.com/pyg-team/pytorch_geometric/pull/8352), [#8357](https://github.com/pyg-team/pytorch_geometric/pull/8357)) diff --git a/test/nn/conv/test_hetero_conv.py b/test/nn/conv/test_hetero_conv.py index b0c03656aa87..04def1f8fb37 100644 --- a/test/nn/conv/test_hetero_conv.py +++ b/test/nn/conv/test_hetero_conv.py @@ -1,8 +1,11 @@ +import random + import pytest import torch import torch_geometric from torch_geometric.data import HeteroData +from torch_geometric.datasets import FakeHeteroDataset from torch_geometric.nn import ( GATConv, GCN2Conv, @@ -12,6 +15,7 @@ MessagePassing, SAGEConv, ) +from torch_geometric.profile import benchmark from torch_geometric.testing import ( disableExtensions, get_random_edge_index, @@ -205,3 +209,64 @@ def test_compile_hetero_conv_graph_breaks(device): assert len(out) == len(expected) for key in expected.keys(): assert torch.allclose(out[key], expected[key], atol=1e-6) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--device', type=str, default='cuda') + parser.add_argument('--backward', action='store_true') + args = parser.parse_args() + + dataset = FakeHeteroDataset(num_graphs=10).to(args.device) + + def gen_args(): + data = dataset[random.randrange(len(dataset))] + return data.x_dict, data.edge_index_dict + + class HeteroGNN(torch.nn.Module): + def __init__(self, channels: int = 32, num_layers: int = 2): + super().__init__() + self.convs = torch.nn.ModuleList() + + conv = HeteroConv({ + edge_type: + SAGEConv( + in_channels=( + dataset.num_features[edge_type[0]], + dataset.num_features[edge_type[-1]], + ), + out_channels=channels, + ) + for edge_type in dataset[0].edge_types + }) + self.convs.append(conv) + + for _ in range(num_layers - 1): + conv = HeteroConv({ + edge_type: + SAGEConv((channels, channels), channels) + for edge_type in dataset[0].edge_types + }) + self.convs.append(conv) + + self.lin = Linear(channels, 1) + + def forward(self, x_dict, edge_index_dict): + for conv in self.convs: + x_dict = conv(x_dict, edge_index_dict) + x_dict = {key: x.relu() for key, x in x_dict.items()} + return self.lin(x_dict['v0']) + + model = HeteroGNN().to(args.device) + compiled_model = torch_geometric.compile(model) + + benchmark( + funcs=[model, compiled_model], + func_names=['Vanilla', 'Compiled'], + args=gen_args, + num_steps=50 if args.device == 'cpu' else 500, + num_warmups=10 if args.device == 'cpu' else 100, + backward=args.backward, + ) diff --git a/torch_geometric/data/in_memory_dataset.py b/torch_geometric/data/in_memory_dataset.py index 62b22bf85c43..34bc2f5c1dde 100644 --- a/torch_geometric/data/in_memory_dataset.py +++ b/torch_geometric/data/in_memory_dataset.py @@ -307,6 +307,31 @@ def __getattr__(self, key: str) -> Any: raise AttributeError(f"'{self.__class__.__name__}' object has no " f"attribute '{key}'") + def to(self, device: Union[int, str]) -> 'InMemoryDataset': + r"""Performs device conversion of the whole dataset.""" + if self._indices is not None: + raise ValueError("The given 'InMemoryDataset' only references a " + "subset of examples of the full dataset") + if self._data_list is not None: + raise ValueError("The data of the dataset is already cached") + self._data.to(device) + return self + + def cpu(self, *args: str) -> 'InMemoryDataset': + r"""Moves the dataset to CPU memory.""" + return self.to(torch.device('cpu')) + + def cuda( + self, + device: Optional[Union[int, str]] = None, + ) -> 'InMemoryDataset': + r"""Moves the dataset toto CUDA memory.""" + if isinstance(device, int): + device = f'cuda:{int}' + elif device is None: + device = 'cuda' + return self.to(device) + def nested_iter(node: Union[Mapping, Sequence]) -> Iterable: if isinstance(node, Mapping):