@@ -40,7 +40,8 @@ P2PProxy::P2PProxy(TransferEngine* engine, const Options& options)
4040 is_cpu_ (options.is_cpu),
4141 rank_(options.rank),
4242 size_(options.size),
43- cuda_device_index_(options.cuda_device_index) {
43+ cuda_device_index_(options.cuda_device_index),
44+ location_(options.location) {
4445 if (!is_cpu_ && cuda_device_index_ < 0 ) {
4546 int current_device = -1 ;
4647 const cudaError_t get_device_error = cudaGetDevice (¤t_device);
@@ -71,15 +72,15 @@ void P2PProxy::AllocateResources() {
7172 resources_.send_buffer_ = std::malloc (kP2PTotalBufferSize );
7273 TORCH_CHECK (resources_.send_buffer_ != nullptr ,
7374 " Failed to allocate CPU P2P send buffer" );
74- int rc = engine_->registerLocalMemory (
75- resources_. send_buffer_ , kP2PTotalBufferSize , kWildcardLocation );
75+ int rc = engine_->registerLocalMemory (resources_. send_buffer_ ,
76+ kP2PTotalBufferSize , location_ );
7677 TORCH_CHECK (rc == 0 , " Failed to register CPU P2P send buffer" );
7778
7879 resources_.recv_buffer_ = std::malloc (kP2PTotalBufferSize );
7980 TORCH_CHECK (resources_.recv_buffer_ != nullptr ,
8081 " Failed to allocate CPU P2P recv buffer" );
81- rc = engine_->registerLocalMemory (
82- resources_. recv_buffer_ , kP2PTotalBufferSize , kWildcardLocation );
82+ rc = engine_->registerLocalMemory (resources_. recv_buffer_ ,
83+ kP2PTotalBufferSize , location_ );
8384 TORCH_CHECK (rc == 0 , " Failed to register CPU P2P recv buffer" );
8485 } else {
8586 SetCudaDeviceIfNeeded (
@@ -89,15 +90,15 @@ void P2PProxy::AllocateResources() {
8990 cudaMalloc (&resources_.send_buffer_ , kP2PTotalBufferSize );
9091 TORCH_CHECK (err == cudaSuccess,
9192 " Failed to allocate CUDA P2P send buffer" );
92- int rc = engine_->registerLocalMemory (
93- resources_. send_buffer_ , kP2PTotalBufferSize , kWildcardLocation );
93+ int rc = engine_->registerLocalMemory (resources_. send_buffer_ ,
94+ kP2PTotalBufferSize , location_ );
9495 TORCH_CHECK (rc == 0 , " Failed to register CUDA P2P send buffer" );
9596
9697 err = cudaMalloc (&resources_.recv_buffer_ , kP2PTotalBufferSize );
9798 TORCH_CHECK (err == cudaSuccess,
9899 " Failed to allocate CUDA P2P recv buffer" );
99- rc = engine_->registerLocalMemory (
100- resources_. recv_buffer_ , kP2PTotalBufferSize , kWildcardLocation );
100+ rc = engine_->registerLocalMemory (resources_. recv_buffer_ ,
101+ kP2PTotalBufferSize , location_ );
101102 TORCH_CHECK (rc == 0 , " Failed to register CUDA P2P recv buffer" );
102103 }
103104
@@ -152,12 +153,12 @@ void P2PProxy::AllocateResources() {
152153
153154 int rc = engine_->registerLocalMemory (resources_.ctrl_send_region_ ,
154155 kMaxNumRanks * sizeof (P2PControlSlot),
155- kWildcardLocation );
156+ location_ );
156157 TORCH_CHECK (rc == 0 , " Failed to register P2P ctrl send region" );
157158
158159 rc = engine_->registerLocalMemory (resources_.ctrl_recv_region_ ,
159160 kMaxNumRanks * sizeof (P2PControlSlot),
160- kWildcardLocation );
161+ location_ );
161162 TORCH_CHECK (rc == 0 , " Failed to register P2P ctrl recv region" );
162163}
163164
0 commit comments