From 231529fd63603d53b804db85d6778ef76a9552b4 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 17:04:40 +0000 Subject: [PATCH 1/9] Encapsulate and standardize deform_conv2d (#3074) * Rename files. * Standardizing method names. * Adding anonymous namespaces. * Applying C++ naming rules and alinging variable names across headers and cpp files. * Syncing names across implementations. * Rename deform_conv2d.h to deform_conv2d.cpp * Use header files: - Create header files for kernel implementation and remove definitions from vision_*.h files. - Eliminate unnecessary headers and ensure all cpp include their headers. * Change the naming convention for kernel implementations. * Remove the _param postfix from the variables and standardizing names. * Exposing public forward/backward methods to the C++ API and moving methods around to minimize git blame changes. --- torchvision/csrc/autocast.h | 2 + ...mConv_cpu.cpp => deform_conv2d_kernel.cpp} | 568 ++++++++--------- torchvision/csrc/cpu/deform_conv2d_kernel.h | 39 ++ torchvision/csrc/cpu/vision_cpu.h | 35 +- ...rmConv_cuda.cu => deform_conv2d_kernel.cu} | 571 +++++++++--------- torchvision/csrc/cuda/deform_conv2d_kernel.h | 39 ++ torchvision/csrc/cuda/vision_cuda.h | 35 +- .../csrc/{DeformConv.h => deform_conv2d.cpp} | 26 +- torchvision/csrc/deform_conv2d.h | 100 +++ torchvision/csrc/vision.cpp | 16 +- 10 files changed, 780 insertions(+), 651 deletions(-) rename torchvision/csrc/cpu/{DeformConv_cpu.cpp => deform_conv2d_kernel.cpp} (84%) create mode 100644 torchvision/csrc/cpu/deform_conv2d_kernel.h rename torchvision/csrc/cuda/{DeformConv_cuda.cu => deform_conv2d_kernel.cu} (85%) create mode 100644 torchvision/csrc/cuda/deform_conv2d_kernel.h rename torchvision/csrc/{DeformConv.h => deform_conv2d.cpp} (96%) create mode 100644 torchvision/csrc/deform_conv2d.h diff --git a/torchvision/csrc/autocast.h b/torchvision/csrc/autocast.h index 1f954464b72..584ef13f389 100644 --- a/torchvision/csrc/autocast.h +++ b/torchvision/csrc/autocast.h @@ -1,5 +1,7 @@ #pragma once +// TODO: Delete this file once none of the methods use it + #if defined(WITH_CUDA) || defined(WITH_HIP) #include #endif diff --git a/torchvision/csrc/cpu/DeformConv_cpu.cpp b/torchvision/csrc/cpu/deform_conv2d_kernel.cpp similarity index 84% rename from torchvision/csrc/cpu/DeformConv_cpu.cpp rename to torchvision/csrc/cpu/deform_conv2d_kernel.cpp index 0212be55aa4..f593e880b3b 100644 --- a/torchvision/csrc/cpu/DeformConv_cpu.cpp +++ b/torchvision/csrc/cpu/deform_conv2d_kernel.cpp @@ -66,18 +66,14 @@ // modified from // https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp -#include -#include -#include +#include "deform_conv2d_kernel.h" -#include -#include -#include +namespace { const int kMaxParallelImgs = 32; template -static scalar_t bilinear_interpolate( +scalar_t bilinear_interpolate( const scalar_t* in, int height, int width, @@ -116,7 +112,7 @@ static scalar_t bilinear_interpolate( } template -static void deformable_im2col_kernel( +void deformable_im2col_kernel( int n, const scalar_t* input, const scalar_t* offset, @@ -129,8 +125,8 @@ static void deformable_im2col_kernel( int pad_w, int stride_h, int stride_w, - int dil_h, - int dil_w, + int dilation_h, + int dilation_w, int batch_sz, int n_in_channels, int n_offset_grps, @@ -180,8 +176,10 @@ static void deformable_im2col_kernel( offset_ptr[offset_idx * (out_h * out_w) + out_y * out_w + out_x]; const scalar_t offset_w = offset_ptr [(offset_idx + 1) * (out_h * out_w) + out_y * out_w + out_x]; - const scalar_t y = (out_y * stride_h - pad_h) + i * dil_h + offset_h; - const scalar_t x = (out_x * stride_w - pad_w) + j * dil_w + offset_w; + const scalar_t y = + (out_y * stride_h - pad_h) + i * dilation_h + offset_h; + const scalar_t x = + (out_x * stride_w - pad_w) + j * dilation_w + offset_w; *columns_ptr = mask_value * bilinear_interpolate(input_ptr, height, width, y, x); columns_ptr += batch_sz * out_h * out_w; @@ -190,7 +188,7 @@ static void deformable_im2col_kernel( } } -static void deformable_im2col( +void deformable_im2col( const at::Tensor& input, const at::Tensor& data_offset, const at::Tensor& data_mask, @@ -203,8 +201,8 @@ static void deformable_im2col( int pad_w, int stride_h, int stride_w, - int dil_h, - int dil_w, + int dilation_h, + int dilation_w, int out_h, int out_w, int parallel_imgs, @@ -228,8 +226,8 @@ static void deformable_im2col( pad_w, stride_h, stride_w, - dil_h, - dil_w, + dilation_h, + dilation_w, parallel_imgs, n_in_channels, deformable_group, @@ -240,7 +238,7 @@ static void deformable_im2col( })); } -static int get_greatest_divisor_below_bound(int n, int bound) { +int get_greatest_divisor_below_bound(int n, int bound) { for (int k = bound; k > 1; --k) { if (n % k == 0) { return k; @@ -249,216 +247,8 @@ static int get_greatest_divisor_below_bound(int n, int bound) { return 1; } -at::Tensor DeformConv2d_forward_cpu( - const at::Tensor& input_param, - const at::Tensor& weight_param, - const at::Tensor& offset_param, - const at::Tensor& mask_param, - const at::Tensor& bias_param, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dil_h, - int64_t dil_w, - int64_t n_weight_grps, - int64_t n_offset_grps, - bool use_mask) { - at::Tensor input = input_param.contiguous(); - at::Tensor offset = offset_param.contiguous(); - at::Tensor weight = weight_param.contiguous(); - at::Tensor mask = mask_param.contiguous(); - at::Tensor bias = bias_param.contiguous(); - - TORCH_CHECK(input.ndimension() == 4); - TORCH_CHECK(offset.ndimension() == 4); - TORCH_CHECK(!use_mask || mask.ndimension() == 4); - TORCH_CHECK(weight.ndimension() == 4); - TORCH_CHECK(input.device().is_cpu(), "input must be a CPU tensor"); - - int batch_sz = input.size(0); - int n_in_channels = input.size(1); - int in_h = input.size(2); - int in_w = input.size(3); - - int n_parallel_imgs = - get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); - - // Unpack shapes and args - int out_channels = weight.size(0); - int weight_h = weight.size(2); - int weight_w = weight.size(3); - - int ker_h = dil_h * (weight_h - 1) + 1; - int ker_w = dil_w * (weight_w - 1) + 1; - int out_h = ((in_h + 2 * pad_h - ker_h) / stride_h) + 1; - int out_w = ((in_w + 2 * pad_w - ker_w) / stride_w) + 1; - - TORCH_CHECK( - weight_h > 0 && weight_w > 0, - "weight_h: ", - weight_h, - " weight_w: ", - weight_w); - TORCH_CHECK( - stride_h > 0 && stride_w > 0, - "stride_h: ", - stride_h, - " stride_w: ", - stride_w); - TORCH_CHECK(pad_h >= 0 && pad_w >= 0, "pad_h: ", pad_h, " pad_w: ", pad_w); - TORCH_CHECK(dil_h > 0 && dil_w > 0, "dil_h: ", dil_h, " dil_w: ", dil_w); - - TORCH_CHECK(weight.size(1) * n_weight_grps == input.size(1)); - TORCH_CHECK(weight.size(0) % n_weight_grps == 0); - TORCH_CHECK( - (offset.size(1) == n_offset_grps * 2 * weight_h * weight_w), - "offset.shape[1] is not valid: got: ", - offset.size(1), - " expected: ", - n_offset_grps * 2 * weight_h * weight_w); - TORCH_CHECK( - (!use_mask || mask.size(1) == n_offset_grps * weight_h * weight_w), - "mask.shape[1] is not valid: got: ", - mask.size(1), - " expected: ", - n_offset_grps * weight_h * weight_w); - TORCH_CHECK(input.size(1) % n_offset_grps == 0); - - TORCH_CHECK( - (offset.size(0) == input.size(0)), "invalid batch size of offset"); - TORCH_CHECK( - (offset.size(2) == out_h && offset.size(3) == out_w), - "offset output dims: (", - offset.size(2), - ", ", - offset.size(3), - ") - ", - "computed output dims: (", - out_h, - ", ", - out_w, - ")"); - TORCH_CHECK((mask.size(0) == input.size(0)), "invalid batch size of mask"); - TORCH_CHECK( - (!use_mask || (mask.size(2) == out_h && mask.size(3) == out_w)), - "offset output dims: (", - mask.size(2), - ", ", - mask.size(3), - ") - ", - "computed output dims: (", - out_h, - ", ", - out_w, - ")"); - TORCH_CHECK( - out_h > 0 && out_w > 0, - "Calculated output size too small - out_h: ", - out_h, - " out_w: ", - out_w); - - auto out = at::zeros({batch_sz, out_channels, out_h, out_w}, input.options()); - if (batch_sz == 0) { - return out; - } - - // Separate batches into blocks - out = out.view({batch_sz / n_parallel_imgs, - n_parallel_imgs, - out_channels, - out_h, - out_w}); - input = input.view( - {batch_sz / n_parallel_imgs, n_parallel_imgs, n_in_channels, in_h, in_w}); - - offset = offset.view({batch_sz / n_parallel_imgs, - n_parallel_imgs, - n_offset_grps * 2 * weight_h * weight_w, - out_h, - out_w}); - - if (use_mask) { - mask = mask.view({batch_sz / n_parallel_imgs, - n_parallel_imgs, - n_offset_grps * weight_h * weight_w, - out_h, - out_w}); - } - - at::Tensor out_buf = at::zeros( - {batch_sz / n_parallel_imgs, - out_channels, - n_parallel_imgs * out_h, - out_w}, - out.options()); - - // Separate channels into convolution groups - out_buf = out_buf.view({out_buf.size(0), - n_weight_grps, - out_buf.size(1) / n_weight_grps, - out_buf.size(2), - out_buf.size(3)}); - weight = weight.view({n_weight_grps, - weight.size(0) / n_weight_grps, - weight.size(1), - weight.size(2), - weight.size(3)}); - - // Sample points and perform convolution - auto columns = at::zeros( - {n_in_channels * weight_h * weight_w, n_parallel_imgs * out_h * out_w}, - input.options()); - for (int b = 0; b < batch_sz / n_parallel_imgs; b++) { - deformable_im2col( - input[b], - offset[b], - mask[b], - n_in_channels, - in_h, - in_w, - weight_h, - weight_w, - pad_h, - pad_w, - stride_h, - stride_w, - dil_h, - dil_w, - out_h, - out_w, - n_parallel_imgs, - n_offset_grps, - use_mask, - columns); - - columns = columns.view( - {n_weight_grps, columns.size(0) / n_weight_grps, columns.size(1)}); - for (int g = 0; g < n_weight_grps; g++) { - out_buf[b][g] = out_buf[b][g] - .flatten(1) - .addmm_(weight[g].flatten(1), columns[g]) - .view_as(out_buf[b][g]); - } - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - } - - out_buf = out_buf.view({batch_sz / n_parallel_imgs, - out_channels, - n_parallel_imgs, - out_h, - out_w}); - out_buf.transpose_(1, 2); - out.copy_(out_buf); - out = out.view({batch_sz, out_channels, out_h, out_w}); - - return out + bias.view({1, out_channels, 1, 1}); -} - template -static void deformable_col2im_kernel( +void deformable_col2im_kernel( int n, const scalar_t* col, const scalar_t* offset, @@ -533,7 +323,7 @@ static void deformable_col2im_kernel( } } -static void compute_grad_input( +void compute_grad_input( const at::Tensor& columns, const at::Tensor& offset, const at::Tensor& mask, @@ -587,7 +377,7 @@ static void compute_grad_input( } template -static scalar_t get_coordinate_weight( +scalar_t get_coordinate_weight( const scalar_t* im_data, int height, int width, @@ -620,7 +410,7 @@ static scalar_t get_coordinate_weight( } template -static void deformable_col2im_coord_kernel( +void deformable_col2im_coord_kernel( int n, const scalar_t* col, const scalar_t* im, @@ -732,7 +522,7 @@ static void deformable_col2im_coord_kernel( } } -static void compute_grad_offset_and_mask( +void compute_grad_offset_and_mask( const at::Tensor& columns, const at::Tensor& input, const at::Tensor& offset, @@ -790,8 +580,7 @@ static void compute_grad_offset_and_mask( })); } -static std::tuple -deform_conv2d_backward_input_cpu( +std::tuple backward_gradient_inputs( at::Tensor input, at::Tensor weight, at::Tensor offset, @@ -801,8 +590,8 @@ deform_conv2d_backward_input_cpu( int stride_w, int pad_h, int pad_w, - int dil_h, - int dil_w, + int dilation_h, + int dilation_w, int n_weight_grps, int n_offset_grps, int n_parallel_imgs, @@ -818,8 +607,10 @@ deform_conv2d_backward_input_cpu( int weight_h = weight.size(2); int weight_w = weight.size(3); - long out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) / stride_h + 1; - long out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) / stride_w + 1; + long out_h = + (in_h + 2 * pad_h - (dilation_h * (weight_h - 1) + 1)) / stride_h + 1; + long out_w = + (in_w + 2 * pad_w - (dilation_w * (weight_w - 1) + 1)) / stride_w + 1; auto grad_input = at::zeros_like(input); auto grad_offset = at::zeros_like(offset); @@ -903,8 +694,8 @@ deform_conv2d_backward_input_cpu( pad_w, stride_h, stride_w, - dil_h, - dil_w, + dilation_h, + dilation_w, n_parallel_imgs, n_offset_grps, use_mask, @@ -924,8 +715,8 @@ deform_conv2d_backward_input_cpu( pad_w, stride_h, stride_w, - dil_h, - dil_w, + dilation_h, + dilation_w, n_parallel_imgs, n_offset_grps, use_mask, @@ -944,7 +735,7 @@ deform_conv2d_backward_input_cpu( return std::make_tuple(grad_input, grad_offset, grad_mask); } -static at::Tensor deform_conv2d_backward_parameters_cpu( +at::Tensor backward_gradient_parameters( at::Tensor input, const at::Tensor& weight, at::Tensor offset, @@ -954,8 +745,8 @@ static at::Tensor deform_conv2d_backward_parameters_cpu( int stride_w, int pad_h, int pad_w, - int dil_h, - int dil_w, + int dilation_h, + int dilation_w, int n_weight_grps, int n_offset_grps, int n_parallel_imgs, @@ -1032,8 +823,8 @@ static at::Tensor deform_conv2d_backward_parameters_cpu( pad_w, stride_h, stride_w, - dil_h, - dil_w, + dilation_h, + dilation_w, out_h, out_w, n_parallel_imgs, @@ -1058,46 +849,263 @@ static at::Tensor deform_conv2d_backward_parameters_cpu( return grad_weight; } +} // namespace + +at::Tensor deform_conv2d_forward_cpu( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask) { + at::Tensor input_c = input.contiguous(); + at::Tensor offset_c = offset.contiguous(); + at::Tensor weight_c = weight.contiguous(); + at::Tensor mask_c = mask.contiguous(); + at::Tensor bias_c = bias.contiguous(); + + TORCH_CHECK(input_c.ndimension() == 4); + TORCH_CHECK(offset_c.ndimension() == 4); + TORCH_CHECK(!use_mask || mask_c.ndimension() == 4); + TORCH_CHECK(weight_c.ndimension() == 4); + TORCH_CHECK(input_c.device().is_cpu(), "input must be a CPU tensor"); + + int batch_sz = input_c.size(0); + int n_in_channels = input_c.size(1); + int in_h = input_c.size(2); + int in_w = input_c.size(3); + + int n_parallel_imgs = + get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); + + // Unpack shapes and args + int out_channels = weight_c.size(0); + int weight_h = weight_c.size(2); + int weight_w = weight_c.size(3); + + int ker_h = dilation_h * (weight_h - 1) + 1; + int ker_w = dilation_w * (weight_w - 1) + 1; + int out_h = ((in_h + 2 * pad_h - ker_h) / stride_h) + 1; + int out_w = ((in_w + 2 * pad_w - ker_w) / stride_w) + 1; + + TORCH_CHECK( + weight_h > 0 && weight_w > 0, + "weight_h: ", + weight_h, + " weight_w: ", + weight_w); + TORCH_CHECK( + stride_h > 0 && stride_w > 0, + "stride_h: ", + stride_h, + " stride_w: ", + stride_w); + TORCH_CHECK(pad_h >= 0 && pad_w >= 0, "pad_h: ", pad_h, " pad_w: ", pad_w); + TORCH_CHECK( + dilation_h > 0 && dilation_w > 0, + "dilation_h: ", + dilation_h, + " dilation_w: ", + dilation_w); + + TORCH_CHECK(weight_c.size(1) * n_weight_grps == input_c.size(1)); + TORCH_CHECK(weight_c.size(0) % n_weight_grps == 0); + TORCH_CHECK( + (offset_c.size(1) == n_offset_grps * 2 * weight_h * weight_w), + "offset.shape[1] is not valid: got: ", + offset_c.size(1), + " expected: ", + n_offset_grps * 2 * weight_h * weight_w); + TORCH_CHECK( + (!use_mask || mask_c.size(1) == n_offset_grps * weight_h * weight_w), + "mask.shape[1] is not valid: got: ", + mask_c.size(1), + " expected: ", + n_offset_grps * weight_h * weight_w); + TORCH_CHECK(input_c.size(1) % n_offset_grps == 0); + + TORCH_CHECK( + (offset_c.size(0) == input_c.size(0)), "invalid batch size of offset"); + TORCH_CHECK( + (offset_c.size(2) == out_h && offset_c.size(3) == out_w), + "offset output dims: (", + offset_c.size(2), + ", ", + offset_c.size(3), + ") - ", + "computed output dims: (", + out_h, + ", ", + out_w, + ")"); + TORCH_CHECK( + (mask_c.size(0) == input_c.size(0)), "invalid batch size of mask"); + TORCH_CHECK( + (!use_mask || (mask_c.size(2) == out_h && mask_c.size(3) == out_w)), + "offset output dims: (", + mask_c.size(2), + ", ", + mask_c.size(3), + ") - ", + "computed output dims: (", + out_h, + ", ", + out_w, + ")"); + TORCH_CHECK( + out_h > 0 && out_w > 0, + "Calculated output size too small - out_h: ", + out_h, + " out_w: ", + out_w); + + auto out = + at::zeros({batch_sz, out_channels, out_h, out_w}, input_c.options()); + if (batch_sz == 0) { + return out; + } + + // Separate batches into blocks + out = out.view({batch_sz / n_parallel_imgs, + n_parallel_imgs, + out_channels, + out_h, + out_w}); + input_c = input_c.view( + {batch_sz / n_parallel_imgs, n_parallel_imgs, n_in_channels, in_h, in_w}); + + offset_c = offset_c.view({batch_sz / n_parallel_imgs, + n_parallel_imgs, + n_offset_grps * 2 * weight_h * weight_w, + out_h, + out_w}); + + if (use_mask) { + mask_c = mask_c.view({batch_sz / n_parallel_imgs, + n_parallel_imgs, + n_offset_grps * weight_h * weight_w, + out_h, + out_w}); + } + + at::Tensor out_buf = at::zeros( + {batch_sz / n_parallel_imgs, + out_channels, + n_parallel_imgs * out_h, + out_w}, + out.options()); + + // Separate channels into convolution groups + out_buf = out_buf.view({out_buf.size(0), + n_weight_grps, + out_buf.size(1) / n_weight_grps, + out_buf.size(2), + out_buf.size(3)}); + weight_c = weight_c.view({n_weight_grps, + weight_c.size(0) / n_weight_grps, + weight_c.size(1), + weight_c.size(2), + weight_c.size(3)}); + + // Sample points and perform convolution + auto columns = at::zeros( + {n_in_channels * weight_h * weight_w, n_parallel_imgs * out_h * out_w}, + input_c.options()); + for (int b = 0; b < batch_sz / n_parallel_imgs; b++) { + deformable_im2col( + input_c[b], + offset_c[b], + mask_c[b], + n_in_channels, + in_h, + in_w, + weight_h, + weight_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + out_h, + out_w, + n_parallel_imgs, + n_offset_grps, + use_mask, + columns); + + columns = columns.view( + {n_weight_grps, columns.size(0) / n_weight_grps, columns.size(1)}); + for (int g = 0; g < n_weight_grps; g++) { + out_buf[b][g] = out_buf[b][g] + .flatten(1) + .addmm_(weight_c[g].flatten(1), columns[g]) + .view_as(out_buf[b][g]); + } + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + } + + out_buf = out_buf.view({batch_sz / n_parallel_imgs, + out_channels, + n_parallel_imgs, + out_h, + out_w}); + out_buf.transpose_(1, 2); + out.copy_(out_buf); + out = out.view({batch_sz, out_channels, out_h, out_w}); + + return out + bias_c.view({1, out_channels, 1, 1}); +} + std::tuple -DeformConv2d_backward_cpu( - const at::Tensor& grad_out_param, - const at::Tensor& input_param, - const at::Tensor& weight_param, - const at::Tensor& offset_param, - const at::Tensor& mask_param, - const at::Tensor& bias_param, +deform_conv2d_backward_cpu( + const at::Tensor& grad_out, + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, int64_t stride_h, int64_t stride_w, int64_t pad_h, int64_t pad_w, - int64_t dil_h, - int64_t dil_w, + int64_t dilation_h, + int64_t dilation_w, int64_t n_weight_grps, int64_t n_offset_grps, bool use_mask) { - at::Tensor grad_out = grad_out_param.contiguous(); - at::Tensor input = input_param.contiguous(); - at::Tensor weight = weight_param.contiguous(); - at::Tensor offset = offset_param.contiguous(); - at::Tensor mask = mask_param.contiguous(); - at::Tensor bias = bias_param.contiguous(); - - const int batch_sz = input.size(0); + at::Tensor grad_out_c = grad_out.contiguous(); + at::Tensor input_c = input.contiguous(); + at::Tensor weight_c = weight.contiguous(); + at::Tensor offset_c = offset.contiguous(); + at::Tensor mask_c = mask.contiguous(); + at::Tensor bias_c = bias.contiguous(); + + const int batch_sz = input_c.size(0); const int n_parallel_imgs = get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); - auto grad_input_and_offset_and_mask = deform_conv2d_backward_input_cpu( - input, - weight, - offset, - mask, - grad_out, + auto grad_input_and_offset_and_mask = backward_gradient_inputs( + input_c, + weight_c, + offset_c, + mask_c, + grad_out_c, stride_h, stride_w, pad_h, pad_w, - dil_h, - dil_w, + dilation_h, + dilation_w, n_weight_grps, n_offset_grps, n_parallel_imgs, @@ -1107,24 +1115,24 @@ DeformConv2d_backward_cpu( auto grad_offset = std::get<1>(grad_input_and_offset_and_mask); auto grad_mask = std::get<2>(grad_input_and_offset_and_mask); - auto grad_weight = deform_conv2d_backward_parameters_cpu( - input, - weight, - offset, - mask, - grad_out, + auto grad_weight = backward_gradient_parameters( + input_c, + weight_c, + offset_c, + mask_c, + grad_out_c, stride_h, stride_w, pad_h, pad_w, - dil_h, - dil_w, + dilation_h, + dilation_w, n_weight_grps, n_offset_grps, n_parallel_imgs, use_mask); - auto grad_bias = at::ones_like(bias) * grad_out.sum({0, 2, 3}); + auto grad_bias = at::ones_like(bias_c) * grad_out_c.sum({0, 2, 3}); return std::make_tuple( grad_input, grad_weight, grad_offset, grad_mask, grad_bias); diff --git a/torchvision/csrc/cpu/deform_conv2d_kernel.h b/torchvision/csrc/cpu/deform_conv2d_kernel.h new file mode 100644 index 00000000000..2eb5ab37c6e --- /dev/null +++ b/torchvision/csrc/cpu/deform_conv2d_kernel.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor deform_conv2d_forward_cpu( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask); + +VISION_API std:: + tuple + deform_conv2d_backward_cpu( + const at::Tensor& grad_out, + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index d5bfcc0de24..6f85d9c0256 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -2,40 +2,7 @@ #include #include "../macros.h" -VISION_API at::Tensor DeformConv2d_forward_cpu( - const at::Tensor& input, - const at::Tensor& weight, - const at::Tensor& offset, - const at::Tensor& mask, - const at::Tensor& bias, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dilation_h, - int64_t dilation_w, - int64_t groups, - int64_t deformable_groups, - bool use_mask); - -VISION_API std:: - tuple - DeformConv2d_backward_cpu( - const at::Tensor& grad_out, - const at::Tensor& input, - const at::Tensor& weight, - const at::Tensor& offset, - const at::Tensor& mask, - const at::Tensor& bias, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dilation_h, - int64_t dilation_w, - int64_t groups, - int64_t deformable_groups, - bool use_mask); +// TODO: Delete this file once all the methods are gone VISION_API at::Tensor nms_cpu( const at::Tensor& dets, diff --git a/torchvision/csrc/cuda/DeformConv_cuda.cu b/torchvision/csrc/cuda/deform_conv2d_kernel.cu similarity index 85% rename from torchvision/csrc/cuda/DeformConv_cuda.cu rename to torchvision/csrc/cuda/deform_conv2d_kernel.cu index 507532e7184..6edaa9c73af 100644 --- a/torchvision/csrc/cuda/DeformConv_cuda.cu +++ b/torchvision/csrc/cuda/deform_conv2d_kernel.cu @@ -67,16 +67,14 @@ // https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp #include -#include #include #include #include #include "cuda_helpers.h" +#include "deform_conv2d_kernel.h" -#include -#include -#include +namespace { const int kMaxParallelImgs = 32; @@ -136,7 +134,7 @@ __device__ scalar_t bilinear_interpolate( } template -__global__ void deformable_im2col_gpu_kernel( +__global__ void deformable_im2col_kernel( int n, const scalar_t* input_ptr, const scalar_t* offset_ptr, @@ -149,8 +147,8 @@ __global__ void deformable_im2col_gpu_kernel( int pad_w, int stride_h, int stride_w, - int dil_h, - int dil_w, + int dilation_h, + int dilation_w, int batch_sz, int n_in_channels, int n_offset_grps, @@ -198,8 +196,10 @@ __global__ void deformable_im2col_gpu_kernel( offset_ptr[offset_idx * (out_h * out_w) + out_y * out_w + out_x]; const scalar_t offset_w = offset_ptr [(offset_idx + 1) * (out_h * out_w) + out_y * out_w + out_x]; - const scalar_t y = (out_y * stride_h - pad_h) + i * dil_h + offset_h; - const scalar_t x = (out_x * stride_w - pad_w) + j * dil_w + offset_w; + const scalar_t y = + (out_y * stride_h - pad_h) + i * dilation_h + offset_h; + const scalar_t x = + (out_x * stride_w - pad_w) + j * dilation_w + offset_w; *columns_ptr = mask_value * bilinear_interpolate(input_ptr, height, width, y, x); columns_ptr += batch_sz * out_h * out_w; @@ -208,7 +208,7 @@ __global__ void deformable_im2col_gpu_kernel( } } -static void deformable_im2col( +void deformable_im2col( const at::Tensor& input, const at::Tensor& data_offset, const at::Tensor& data_mask, @@ -221,8 +221,8 @@ static void deformable_im2col( int pad_w, int stride_h, int stride_w, - int dil_h, - int dil_w, + int dilation_h, + int dilation_w, int out_h, int out_w, int parallel_imgs, @@ -236,7 +236,7 @@ static void deformable_im2col( AT_DISPATCH_FLOATING_TYPES_AND_HALF( input.scalar_type(), "deformable_im2col_gpu", ([&] { - deformable_im2col_gpu_kernel<<< + deformable_im2col_kernel<<< blocks, threads>>>( num_kernels, @@ -251,8 +251,8 @@ static void deformable_im2col( pad_w, stride_h, stride_w, - dil_h, - dil_w, + dilation_h, + dilation_w, parallel_imgs, n_in_channels, deformable_group, @@ -268,7 +268,7 @@ static void deformable_im2col( } } -static int get_greatest_divisor_below_bound(int n, int bound) { +int get_greatest_divisor_below_bound(int n, int bound) { for (int k = bound; k > 1; --k) { if (n % k == 0) { return k; @@ -277,217 +277,8 @@ static int get_greatest_divisor_below_bound(int n, int bound) { return 1; } -at::Tensor DeformConv2d_forward_cuda( - const at::Tensor& input_param, - const at::Tensor& weight_param, - const at::Tensor& offset_param, - const at::Tensor& mask_param, - const at::Tensor& bias_param, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dil_h, - int64_t dil_w, - int64_t n_weight_grps, - int64_t n_offset_grps, - bool use_mask) { - at::Tensor input = input_param.contiguous(); - at::Tensor offset = offset_param.contiguous(); - at::Tensor weight = weight_param.contiguous(); - at::Tensor mask = mask_param.contiguous(); - at::Tensor bias = bias_param.contiguous(); - - TORCH_CHECK(input.ndimension() == 4); - TORCH_CHECK(offset.ndimension() == 4); - TORCH_CHECK(!use_mask || mask.ndimension() == 4); - TORCH_CHECK(weight.ndimension() == 4); - TORCH_CHECK(input.is_cuda(), "input must be a CUDA tensor"); - - at::DeviceGuard guard(input.device()); - - int batch_sz = input.size(0); - int in_channels = input.size(1); - int in_h = input.size(2); - int in_w = input.size(3); - - int n_parallel_imgs = - get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); - - int out_channels = weight.size(0); - int weight_h = weight.size(2); - int weight_w = weight.size(3); - - int ker_h = dil_h * (weight_h - 1) + 1; - int ker_w = dil_w * (weight_w - 1) + 1; - int out_h = ((in_h + 2 * pad_h - ker_h) / stride_h) + 1; - int out_w = ((in_w + 2 * pad_w - ker_w) / stride_w) + 1; - - TORCH_CHECK( - weight_h > 0 && weight_w > 0, - "weight_h: ", - weight_h, - " weight_w: ", - weight_w); - TORCH_CHECK( - stride_h > 0 && stride_w > 0, - "stride_h: ", - stride_h, - " stride_w: ", - stride_w); - TORCH_CHECK(pad_h >= 0 && pad_w >= 0, "pad_h: ", pad_h, " pad_w: ", pad_w); - TORCH_CHECK(dil_h > 0 && dil_w > 0, "dil_h: ", dil_h, " dil_w: ", dil_w); - - TORCH_CHECK(weight.size(1) * n_weight_grps == input.size(1)); - TORCH_CHECK(weight.size(0) % n_weight_grps == 0); - TORCH_CHECK( - (offset.size(1) == n_offset_grps * 2 * weight_h * weight_w), - "offset.shape[1] is not valid: got: ", - offset.size(1), - " expected: ", - n_offset_grps * 2 * weight_h * weight_w); - TORCH_CHECK( - (!use_mask || mask.size(1) == n_offset_grps * weight_h * weight_w), - "mask.shape[1] is not valid: got: ", - mask.size(1), - " expected: ", - n_offset_grps * weight_h * weight_w); - TORCH_CHECK(input.size(1) % n_offset_grps == 0); - - TORCH_CHECK( - (offset.size(0) == input.size(0)), "invalid batch size of offset"); - TORCH_CHECK( - (offset.size(2) == out_h && offset.size(3) == out_w), - "offset output dims: (", - offset.size(2), - ", ", - offset.size(3), - ") - ", - "computed output dims: (", - out_h, - ", ", - out_w, - ")"); - TORCH_CHECK((mask.size(0) == input.size(0)), "invalid batch size of mask"); - TORCH_CHECK( - (!use_mask || (mask.size(2) == out_h && mask.size(3) == out_w)), - "mask output dims: (", - mask.size(2), - ", ", - mask.size(3), - ") - ", - "computed output dims: (", - out_h, - ", ", - out_w, - ")"); - TORCH_CHECK( - out_h > 0 && out_w > 0, - "Calculated output size too small - out_h: ", - out_h, - " out_w: ", - out_w); - - auto out = at::zeros({batch_sz, out_channels, out_h, out_w}, input.options()); - if (batch_sz == 0) { - return out; - } - - // Separate batches into blocks - out = out.view({batch_sz / n_parallel_imgs, - n_parallel_imgs, - out_channels, - out_h, - out_w}); - input = input.view( - {batch_sz / n_parallel_imgs, n_parallel_imgs, in_channels, in_h, in_w}); - - offset = offset.view({batch_sz / n_parallel_imgs, - n_parallel_imgs, - n_offset_grps * 2 * weight_h * weight_w, - out_h, - out_w}); - - if (use_mask) { - mask = mask.view({batch_sz / n_parallel_imgs, - n_parallel_imgs, - n_offset_grps * weight_h * weight_w, - out_h, - out_w}); - } - - at::Tensor out_buf = at::zeros( - {batch_sz / n_parallel_imgs, - out_channels, - n_parallel_imgs * out_h, - out_w}, - out.options()); - - // Separate channels into convolution groups - out_buf = out_buf.view({out_buf.size(0), - n_weight_grps, - out_buf.size(1) / n_weight_grps, - out_buf.size(2), - out_buf.size(3)}); - weight = weight.view({n_weight_grps, - weight.size(0) / n_weight_grps, - weight.size(1), - weight.size(2), - weight.size(3)}); - - // Sample points and perform convolution - auto columns = at::zeros( - {in_channels * weight_h * weight_w, n_parallel_imgs * out_h * out_w}, - input.options()); - for (int b = 0; b < batch_sz / n_parallel_imgs; b++) { - deformable_im2col( - input[b], - offset[b], - mask[b], - in_channels, - in_h, - in_w, - weight_h, - weight_w, - pad_h, - pad_w, - stride_h, - stride_w, - dil_h, - dil_w, - out_h, - out_w, - n_parallel_imgs, - n_offset_grps, - use_mask, - columns); - - columns = columns.view( - {n_weight_grps, columns.size(0) / n_weight_grps, columns.size(1)}); - for (int g = 0; g < n_weight_grps; g++) { - out_buf[b][g] = out_buf[b][g] - .flatten(1) - .addmm_(weight[g].flatten(1), columns[g]) - .view_as(out_buf[b][g]); - } - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - } - - out_buf = out_buf.view({batch_sz / n_parallel_imgs, - out_channels, - n_parallel_imgs, - out_h, - out_w}); - out_buf.transpose_(1, 2); - out.copy_(out_buf); - out = out.view({batch_sz, out_channels, out_h, out_w}); - - return out + bias.view({1, out_channels, 1, 1}); -} - template -__global__ void deformable_col2im_gpu_kernel( +__global__ void deformable_col2im_kernel( int n, const scalar_t* col, const scalar_t* offset_ptr, @@ -560,7 +351,7 @@ __global__ void deformable_col2im_gpu_kernel( } } -static void compute_grad_input( +void compute_grad_input( const at::Tensor& columns, const at::Tensor& offset, const at::Tensor& mask, @@ -591,7 +382,7 @@ static void compute_grad_input( AT_DISPATCH_FLOATING_TYPES_AND_HALF( columns.scalar_type(), "deformable_col2im_gpu", ([&] { - deformable_col2im_gpu_kernel<<< + deformable_col2im_kernel<<< blocks, threads>>>( num_kernels, @@ -657,7 +448,7 @@ __device__ scalar_t get_coordinate_weight( } template -__global__ void deformable_col2im_coord_gpu_kernel( +__global__ void deformable_col2im_coord_kernel( int n, const scalar_t* col_ptr, const scalar_t* im_ptr, @@ -766,7 +557,7 @@ __global__ void deformable_col2im_coord_gpu_kernel( } } -static void compute_grad_offset_and_mask( +void compute_grad_offset_and_mask( const at::Tensor& columns, const at::Tensor& input, const at::Tensor& offset, @@ -799,7 +590,7 @@ static void compute_grad_offset_and_mask( AT_DISPATCH_FLOATING_TYPES_AND_HALF( columns.scalar_type(), "deformable_col2im_coord_gpu", ([&] { - deformable_col2im_coord_gpu_kernel<<< + deformable_col2im_coord_kernel<<< blocks, threads>>>( num_kernels, @@ -835,7 +626,7 @@ static void compute_grad_offset_and_mask( } } -static std::tuple deform_conv2d_backward_input_cuda( +std::tuple backward_gradient_inputs( at::Tensor input, at::Tensor weight, at::Tensor offset, @@ -845,8 +636,8 @@ static std::tuple deform_conv2d_backward_inp int stride_w, int pad_h, int pad_w, - int dil_h, - int dil_w, + int dilation_h, + int dilation_w, int n_weight_grps, int n_offset_grps, int n_parallel_imgs, @@ -864,8 +655,10 @@ static std::tuple deform_conv2d_backward_inp int weight_h = weight.size(2); int weight_w = weight.size(3); - long out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) / stride_w + 1; - long out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) / stride_h + 1; + long out_w = + (in_w + 2 * pad_w - (dilation_w * (weight_w - 1) + 1)) / stride_w + 1; + long out_h = + (in_h + 2 * pad_h - (dilation_h * (weight_h - 1) + 1)) / stride_h + 1; auto grad_input = at::zeros_like(input); auto grad_offset = at::zeros_like(offset); @@ -948,8 +741,8 @@ static std::tuple deform_conv2d_backward_inp pad_w, stride_h, stride_w, - dil_h, - dil_w, + dilation_h, + dilation_w, n_parallel_imgs, n_offset_grps, use_mask, @@ -969,8 +762,8 @@ static std::tuple deform_conv2d_backward_inp pad_w, stride_h, stride_w, - dil_h, - dil_w, + dilation_h, + dilation_w, n_parallel_imgs, n_offset_grps, use_mask, @@ -989,7 +782,7 @@ static std::tuple deform_conv2d_backward_inp return std::make_tuple(grad_input, grad_offset, grad_mask); } -static at::Tensor deform_conv2d_backward_parameters_cuda( +at::Tensor backward_gradient_parameters( at::Tensor input, const at::Tensor& weight, at::Tensor offset, @@ -999,8 +792,8 @@ static at::Tensor deform_conv2d_backward_parameters_cuda( int stride_w, int pad_h, int pad_w, - int dil_h, - int dil_w, + int dilation_h, + int dilation_w, int n_weight_grps, int n_offset_grps, int n_parallel_imgs, @@ -1079,8 +872,8 @@ static at::Tensor deform_conv2d_backward_parameters_cuda( pad_w, stride_h, stride_w, - dil_h, - dil_w, + dilation_h, + dilation_w, out_h, out_w, n_parallel_imgs, @@ -1105,46 +898,264 @@ static at::Tensor deform_conv2d_backward_parameters_cuda( return grad_weight; } +} // namespace + +at::Tensor deform_conv2d_forward_cuda( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask) { + at::Tensor input_c = input.contiguous(); + at::Tensor offset_c = offset.contiguous(); + at::Tensor weight_c = weight.contiguous(); + at::Tensor mask_c = mask.contiguous(); + at::Tensor bias_c = bias.contiguous(); + + TORCH_CHECK(input_c.ndimension() == 4); + TORCH_CHECK(offset_c.ndimension() == 4); + TORCH_CHECK(!use_mask || mask_c.ndimension() == 4); + TORCH_CHECK(weight_c.ndimension() == 4); + TORCH_CHECK(input_c.is_cuda(), "input must be a CUDA tensor"); + + at::DeviceGuard guard(input_c.device()); + + int batch_sz = input_c.size(0); + int in_channels = input_c.size(1); + int in_h = input_c.size(2); + int in_w = input_c.size(3); + + int n_parallel_imgs = + get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); + + int out_channels = weight_c.size(0); + int weight_h = weight_c.size(2); + int weight_w = weight_c.size(3); + + int ker_h = dilation_h * (weight_h - 1) + 1; + int ker_w = dilation_w * (weight_w - 1) + 1; + int out_h = ((in_h + 2 * pad_h - ker_h) / stride_h) + 1; + int out_w = ((in_w + 2 * pad_w - ker_w) / stride_w) + 1; + + TORCH_CHECK( + weight_h > 0 && weight_w > 0, + "weight_h: ", + weight_h, + " weight_w: ", + weight_w); + TORCH_CHECK( + stride_h > 0 && stride_w > 0, + "stride_h: ", + stride_h, + " stride_w: ", + stride_w); + TORCH_CHECK(pad_h >= 0 && pad_w >= 0, "pad_h: ", pad_h, " pad_w: ", pad_w); + TORCH_CHECK( + dilation_h > 0 && dilation_w > 0, + "dilation_h: ", + dilation_h, + " dilation_w: ", + dilation_w); + + TORCH_CHECK(weight_c.size(1) * n_weight_grps == input_c.size(1)); + TORCH_CHECK(weight_c.size(0) % n_weight_grps == 0); + TORCH_CHECK( + (offset_c.size(1) == n_offset_grps * 2 * weight_h * weight_w), + "offset.shape[1] is not valid: got: ", + offset_c.size(1), + " expected: ", + n_offset_grps * 2 * weight_h * weight_w); + TORCH_CHECK( + (!use_mask || mask_c.size(1) == n_offset_grps * weight_h * weight_w), + "mask.shape[1] is not valid: got: ", + mask_c.size(1), + " expected: ", + n_offset_grps * weight_h * weight_w); + TORCH_CHECK(input_c.size(1) % n_offset_grps == 0); + + TORCH_CHECK( + (offset_c.size(0) == input_c.size(0)), "invalid batch size of offset"); + TORCH_CHECK( + (offset_c.size(2) == out_h && offset_c.size(3) == out_w), + "offset output dims: (", + offset_c.size(2), + ", ", + offset_c.size(3), + ") - ", + "computed output dims: (", + out_h, + ", ", + out_w, + ")"); + TORCH_CHECK( + (mask_c.size(0) == input_c.size(0)), "invalid batch size of mask"); + TORCH_CHECK( + (!use_mask || (mask_c.size(2) == out_h && mask_c.size(3) == out_w)), + "mask output dims: (", + mask_c.size(2), + ", ", + mask_c.size(3), + ") - ", + "computed output dims: (", + out_h, + ", ", + out_w, + ")"); + TORCH_CHECK( + out_h > 0 && out_w > 0, + "Calculated output size too small - out_h: ", + out_h, + " out_w: ", + out_w); + + auto out = + at::zeros({batch_sz, out_channels, out_h, out_w}, input_c.options()); + if (batch_sz == 0) { + return out; + } + + // Separate batches into blocks + out = out.view({batch_sz / n_parallel_imgs, + n_parallel_imgs, + out_channels, + out_h, + out_w}); + input_c = input_c.view( + {batch_sz / n_parallel_imgs, n_parallel_imgs, in_channels, in_h, in_w}); + + offset_c = offset_c.view({batch_sz / n_parallel_imgs, + n_parallel_imgs, + n_offset_grps * 2 * weight_h * weight_w, + out_h, + out_w}); + + if (use_mask) { + mask_c = mask_c.view({batch_sz / n_parallel_imgs, + n_parallel_imgs, + n_offset_grps * weight_h * weight_w, + out_h, + out_w}); + } + + at::Tensor out_buf = at::zeros( + {batch_sz / n_parallel_imgs, + out_channels, + n_parallel_imgs * out_h, + out_w}, + out.options()); + + // Separate channels into convolution groups + out_buf = out_buf.view({out_buf.size(0), + n_weight_grps, + out_buf.size(1) / n_weight_grps, + out_buf.size(2), + out_buf.size(3)}); + weight_c = weight_c.view({n_weight_grps, + weight_c.size(0) / n_weight_grps, + weight_c.size(1), + weight_c.size(2), + weight_c.size(3)}); + + // Sample points and perform convolution + auto columns = at::zeros( + {in_channels * weight_h * weight_w, n_parallel_imgs * out_h * out_w}, + input_c.options()); + for (int b = 0; b < batch_sz / n_parallel_imgs; b++) { + deformable_im2col( + input_c[b], + offset_c[b], + mask_c[b], + in_channels, + in_h, + in_w, + weight_h, + weight_w, + pad_h, + pad_w, + stride_h, + stride_w, + dilation_h, + dilation_w, + out_h, + out_w, + n_parallel_imgs, + n_offset_grps, + use_mask, + columns); + + columns = columns.view( + {n_weight_grps, columns.size(0) / n_weight_grps, columns.size(1)}); + for (int g = 0; g < n_weight_grps; g++) { + out_buf[b][g] = out_buf[b][g] + .flatten(1) + .addmm_(weight_c[g].flatten(1), columns[g]) + .view_as(out_buf[b][g]); + } + columns = + columns.view({columns.size(0) * columns.size(1), columns.size(2)}); + } + + out_buf = out_buf.view({batch_sz / n_parallel_imgs, + out_channels, + n_parallel_imgs, + out_h, + out_w}); + out_buf.transpose_(1, 2); + out.copy_(out_buf); + out = out.view({batch_sz, out_channels, out_h, out_w}); + + return out + bias_c.view({1, out_channels, 1, 1}); +} + std::tuple -DeformConv2d_backward_cuda( - const at::Tensor& grad_out_param, - const at::Tensor& input_param, - const at::Tensor& weight_param, - const at::Tensor& offset_param, - const at::Tensor& mask_param, - const at::Tensor& bias_param, +deform_conv2d_backward_cuda( + const at::Tensor& grad_out, + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, int64_t stride_h, int64_t stride_w, int64_t pad_h, int64_t pad_w, - int64_t dil_h, - int64_t dil_w, + int64_t dilation_h, + int64_t dilation_w, int64_t n_weight_grps, int64_t n_offset_grps, bool use_mask) { - at::Tensor grad_out = grad_out_param.contiguous(); - at::Tensor input = input_param.contiguous(); - at::Tensor weight = weight_param.contiguous(); - at::Tensor offset = offset_param.contiguous(); - at::Tensor mask = mask_param.contiguous(); - at::Tensor bias = bias_param.contiguous(); - - const int batch_sz = input.size(0); + at::Tensor grad_out_c = grad_out.contiguous(); + at::Tensor input_c = input.contiguous(); + at::Tensor weight_c = weight.contiguous(); + at::Tensor offset_c = offset.contiguous(); + at::Tensor mask_c = mask.contiguous(); + at::Tensor bias_c = bias.contiguous(); + + const int batch_sz = input_c.size(0); const int n_parallel_imgs = get_greatest_divisor_below_bound(batch_sz, kMaxParallelImgs); - auto grad_input_and_offset_and_mask = deform_conv2d_backward_input_cuda( - input, - weight, - offset, - mask, - grad_out, + auto grad_input_and_offset_and_mask = backward_gradient_inputs( + input_c, + weight_c, + offset_c, + mask_c, + grad_out_c, stride_h, stride_w, pad_h, pad_w, - dil_h, - dil_w, + dilation_h, + dilation_w, n_weight_grps, n_offset_grps, n_parallel_imgs, @@ -1154,25 +1165,25 @@ DeformConv2d_backward_cuda( auto grad_offset = std::get<1>(grad_input_and_offset_and_mask); auto grad_mask = std::get<2>(grad_input_and_offset_and_mask); - auto grad_weight = deform_conv2d_backward_parameters_cuda( - input, - weight, - offset, - mask, - grad_out, + auto grad_weight = backward_gradient_parameters( + input_c, + weight_c, + offset_c, + mask_c, + grad_out_c, stride_h, stride_w, pad_h, pad_w, - dil_h, - dil_w, + dilation_h, + dilation_w, n_weight_grps, n_offset_grps, n_parallel_imgs, use_mask); - auto value = grad_out.sum({0, 2, 3}); - auto grad_bias = at::ones_like(bias) * value; + auto value = grad_out_c.sum({0, 2, 3}); + auto grad_bias = at::ones_like(bias_c) * value; return std::make_tuple( grad_input, grad_weight, grad_offset, grad_mask, grad_bias); diff --git a/torchvision/csrc/cuda/deform_conv2d_kernel.h b/torchvision/csrc/cuda/deform_conv2d_kernel.h new file mode 100644 index 00000000000..00f3f3dc15d --- /dev/null +++ b/torchvision/csrc/cuda/deform_conv2d_kernel.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor deform_conv2d_forward_cuda( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask); + +VISION_API std:: + tuple + deform_conv2d_backward_cuda( + const at::Tensor& grad_out, + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t n_weight_grps, + int64_t n_offset_grps, + bool use_mask); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index bf57f1c7967..834973c5327 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -2,40 +2,7 @@ #include #include "../macros.h" -VISION_API at::Tensor DeformConv2d_forward_cuda( - const at::Tensor& input, - const at::Tensor& weight, - const at::Tensor& offset, - const at::Tensor& mask, - const at::Tensor& bias, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dilation_h, - int64_t dilation_w, - int64_t groups, - int64_t deformable_groups, - bool use_mask); - -VISION_API std:: - tuple - DeformConv2d_backward_cuda( - const at::Tensor& grad_out, - const at::Tensor& input, - const at::Tensor& weight, - const at::Tensor& offset, - const at::Tensor& mask, - const at::Tensor& bias, - int64_t stride_h, - int64_t stride_w, - int64_t pad_h, - int64_t pad_w, - int64_t dilation_h, - int64_t dilation_w, - int64_t groups, - int64_t deformable_groups, - bool use_mask); +// TODO: Delete this file once all the methods are gone VISION_API at::Tensor nms_cuda( const at::Tensor& dets, diff --git a/torchvision/csrc/DeformConv.h b/torchvision/csrc/deform_conv2d.cpp similarity index 96% rename from torchvision/csrc/DeformConv.h rename to torchvision/csrc/deform_conv2d.cpp index f8a8dba60e6..74ba630537a 100644 --- a/torchvision/csrc/DeformConv.h +++ b/torchvision/csrc/deform_conv2d.cpp @@ -1,18 +1,10 @@ -#pragma once +#include "deform_conv2d.h" +#include -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" -#endif -#ifdef WITH_HIP -#include "autocast.h" -#include "hip/vision_cuda.h" +#if defined(WITH_CUDA) || defined(WITH_HIP) +#include #endif -// TODO: put this stuff in torchvision namespace - at::Tensor deform_conv2d( const at::Tensor& input, const at::Tensor& weight, @@ -49,7 +41,7 @@ at::Tensor deform_conv2d( } #if defined(WITH_CUDA) || defined(WITH_HIP) -at::Tensor DeformConv2d_autocast( +at::Tensor deform_conv2d_autocast( const at::Tensor& input, const at::Tensor& weight, const at::Tensor& offset, @@ -123,6 +115,8 @@ _deform_conv2d_backward( use_mask); } +namespace { + class DeformConv2dFunction : public torch::autograd::Function { public: @@ -297,7 +291,9 @@ class DeformConv2dBackwardFunction } }; -at::Tensor DeformConv2d_autograd( +} // namespace + +at::Tensor deform_conv2d_autograd( const at::Tensor& input, const at::Tensor& weight, const at::Tensor& offset, @@ -330,7 +326,7 @@ at::Tensor DeformConv2d_autograd( } std::tuple -DeformConv2d_backward_autograd( +deform_conv2d_backward_autograd( const at::Tensor& grad, const at::Tensor& input, const at::Tensor& weight, diff --git a/torchvision/csrc/deform_conv2d.h b/torchvision/csrc/deform_conv2d.h new file mode 100644 index 00000000000..6adc77fb888 --- /dev/null +++ b/torchvision/csrc/deform_conv2d.h @@ -0,0 +1,100 @@ +#pragma once + +#include "cpu/deform_conv2d_kernel.h" + +#ifdef WITH_CUDA +#include "cuda/deform_conv2d_kernel.h" +#endif +#ifdef WITH_HIP +#include "hip/deform_conv2d_kernel.h" +#endif + +// C++ Forward +at::Tensor deform_conv2d( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t groups, + int64_t offset_groups, + bool use_mask); + +// Autocast Forward +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor deform_conv2d_autocast( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t groups, + int64_t offset_groups, + bool use_mask); +#endif + +// C++ Backward +std::tuple +_deform_conv2d_backward( + const at::Tensor& grad, + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t groups, + int64_t offset_groups, + bool use_mask); + +// Autograd Forward and Backward +at::Tensor deform_conv2d_autograd( + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t groups, + int64_t offset_groups, + bool use_mask); + +std::tuple +deform_conv2d_backward_autograd( + const at::Tensor& grad, + const at::Tensor& input, + const at::Tensor& weight, + const at::Tensor& offset, + const at::Tensor& mask, + const at::Tensor& bias, + int64_t stride_h, + int64_t stride_w, + int64_t pad_h, + int64_t pad_w, + int64_t dilation_h, + int64_t dilation_w, + int64_t groups, + int64_t offset_groups, + bool use_mask); diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index 44c8346ff7b..2d4e2af0f53 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -8,11 +8,11 @@ #include #endif -#include "DeformConv.h" #include "PSROIAlign.h" #include "PSROIPool.h" #include "ROIAlign.h" #include "ROIPool.h" +#include "deform_conv2d.h" #include "empty_tensor_op.h" #include "nms.h" @@ -62,8 +62,8 @@ TORCH_LIBRARY(torchvision, m) { } TORCH_LIBRARY_IMPL(torchvision, CPU, m) { - m.impl("deform_conv2d", DeformConv2d_forward_cpu); - m.impl("_deform_conv2d_backward", DeformConv2d_backward_cpu); + m.impl("deform_conv2d", deform_conv2d_forward_cpu); + m.impl("_deform_conv2d_backward", deform_conv2d_backward_cpu); m.impl("nms", nms_cpu); m.impl("ps_roi_align", PSROIAlign_forward_cpu); m.impl("_ps_roi_align_backward", PSROIAlign_backward_cpu); @@ -78,8 +78,8 @@ TORCH_LIBRARY_IMPL(torchvision, CPU, m) { // TODO: Place this in a hypothetical separate torchvision_cuda library #if defined(WITH_CUDA) || defined(WITH_HIP) TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { - m.impl("deform_conv2d", DeformConv2d_forward_cuda); - m.impl("_deform_conv2d_backward", DeformConv2d_backward_cuda); + m.impl("deform_conv2d", deform_conv2d_forward_cuda); + m.impl("_deform_conv2d_backward", deform_conv2d_backward_cuda); m.impl("nms", nms_cuda); m.impl("ps_roi_align", PSROIAlign_forward_cuda); m.impl("_ps_roi_align_backward", PSROIAlign_backward_cuda); @@ -95,7 +95,7 @@ TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { // Autocast only needs to wrap forward pass ops. #if defined(WITH_CUDA) || defined(WITH_HIP) TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { - m.impl("deform_conv2d", DeformConv2d_autocast); + m.impl("deform_conv2d", deform_conv2d_autocast); m.impl("nms", nms_autocast); m.impl("ps_roi_align", PSROIAlign_autocast); m.impl("ps_roi_pool", PSROIPool_autocast); @@ -105,8 +105,8 @@ TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { #endif TORCH_LIBRARY_IMPL(torchvision, Autograd, m) { - m.impl("deform_conv2d", DeformConv2d_autograd); - m.impl("_deform_conv2d_backward", DeformConv2d_backward_autograd); + m.impl("deform_conv2d", deform_conv2d_autograd); + m.impl("_deform_conv2d_backward", deform_conv2d_backward_autograd); m.impl("ps_roi_align", PSROIAlign_autograd); m.impl("_ps_roi_align_backward", PSROIAlign_backward_autograd); m.impl("ps_roi_pool", PSROIPool_autograd); From 44c2eb423a0d6732082e1c06851fb44e31d87759 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 20:19:58 +0000 Subject: [PATCH 2/9] Encapsulate and standardize nms (#3081) * Syncing, where possible, the names of functions across devices. * Adding all internal functions in anonymous namespaces. * Renaming C++/CUDA kernel files and moving operator code from header to cpp file. * Create foreach cpp file a separate header file with "public" functions. * Removing unnecessary repeated includes. * Update CMakeLists.txt to include all headers. --- CMakeLists.txt | 10 ++++--- .../csrc/cpu/{nms_cpu.cpp => nms_kernel.cpp} | 10 +++++-- torchvision/csrc/cpu/nms_kernel.h | 9 ++++++ torchvision/csrc/cpu/vision_cpu.h | 5 ---- .../csrc/cuda/{nms_cuda.cu => nms_kernel.cu} | 14 ++++++--- torchvision/csrc/cuda/nms_kernel.h | 9 ++++++ torchvision/csrc/cuda/vision_cuda.h | 5 ---- torchvision/csrc/nms.cpp | 29 +++++++++++++++++++ torchvision/csrc/nms.h | 26 +++++------------ 9 files changed, 77 insertions(+), 40 deletions(-) rename torchvision/csrc/cpu/{nms_cpu.cpp => nms_kernel.cpp} (95%) create mode 100644 torchvision/csrc/cpu/nms_kernel.h rename torchvision/csrc/cuda/{nms_cuda.cu => nms_kernel.cu} (96%) create mode 100644 torchvision/csrc/cuda/nms_kernel.h create mode 100644 torchvision/csrc/nms.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 81ca559d530..e6b97786888 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,9 +32,11 @@ file(GLOB HEADERS torchvision/csrc/*.h) # Image extension file(GLOB IMAGE_HEADERS torchvision/csrc/cpu/image/*.h) file(GLOB IMAGE_SOURCES torchvision/csrc/cpu/image/*.cpp) -file(GLOB OPERATOR_SOURCES torchvision/csrc/cpu/*.h torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp) +file(GLOB OPERATOR_HEADERS torchvision/csrc/cpu/*.h) +file(GLOB OPERATOR_SOURCES ${OPERATOR_HEADERS} torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp) if(WITH_CUDA) - file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} torchvision/csrc/cuda/*.h torchvision/csrc/cuda/*.cu) + file(GLOB OPERATOR_HEADERS ${OPERATOR_HEADERS} torchvision/csrc/cuda/*.h) + file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} ${OPERATOR_HEADERS} torchvision/csrc/cuda/*.cu) endif() file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h) file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp) @@ -95,11 +97,11 @@ install(EXPORT TorchVisionTargets install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}) install(FILES - torchvision/csrc/cpu/vision_cpu.h + ${OPERATOR_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu) if(WITH_CUDA) install(FILES - torchvision/csrc/cuda/vision_cuda.h + ${OPERATOR_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda) endif() install(FILES ${MODELS_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/models) diff --git a/torchvision/csrc/cpu/nms_cpu.cpp b/torchvision/csrc/cpu/nms_kernel.cpp similarity index 95% rename from torchvision/csrc/cpu/nms_cpu.cpp rename to torchvision/csrc/cpu/nms_kernel.cpp index 00a4c61db7a..036a91f56dc 100644 --- a/torchvision/csrc/cpu/nms_cpu.cpp +++ b/torchvision/csrc/cpu/nms_kernel.cpp @@ -1,7 +1,9 @@ -#include "vision_cpu.h" +#include "nms_kernel.h" + +namespace { template -at::Tensor nms_cpu_kernel( +at::Tensor nms_kernel( const at::Tensor& dets, const at::Tensor& scores, double iou_threshold) { @@ -69,6 +71,8 @@ at::Tensor nms_cpu_kernel( return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); } +} // namespace + at::Tensor nms_cpu( const at::Tensor& dets, const at::Tensor& scores, @@ -95,7 +99,7 @@ at::Tensor nms_cpu( auto result = at::empty({0}, dets.options()); AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { - result = nms_cpu_kernel(dets, scores, iou_threshold); + result = nms_kernel(dets, scores, iou_threshold); }); return result; } diff --git a/torchvision/csrc/cpu/nms_kernel.h b/torchvision/csrc/cpu/nms_kernel.h new file mode 100644 index 00000000000..7b6ef442626 --- /dev/null +++ b/torchvision/csrc/cpu/nms_kernel.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor nms_cpu( + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index 6f85d9c0256..39d89bf6515 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -4,11 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API at::Tensor nms_cpu( - const at::Tensor& dets, - const at::Tensor& scores, - double iou_threshold); - VISION_API std::tuple PSROIAlign_forward_cpu( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/cuda/nms_cuda.cu b/torchvision/csrc/cuda/nms_kernel.cu similarity index 96% rename from torchvision/csrc/cuda/nms_cuda.cu rename to torchvision/csrc/cuda/nms_kernel.cu index 548dc2f69cb..8785bd84897 100644 --- a/torchvision/csrc/cuda/nms_cuda.cu +++ b/torchvision/csrc/cuda/nms_kernel.cu @@ -3,14 +3,17 @@ #include #include "cuda_helpers.h" +#include "nms_kernel.h" -#include -#include +namespace { int const threadsPerBlock = sizeof(unsigned long long) * 8; template -__device__ inline bool devIoU(T const* const a, T const* const b, const float threshold) { +__device__ inline bool devIoU( + T const* const a, + T const* const b, + const float threshold) { T left = max(a[0], b[0]), right = min(a[2], b[2]); T top = max(a[1], b[1]), bottom = min(a[3], b[3]); T width = max(right - left, (T)0), height = max(bottom - top, (T)0); @@ -29,7 +32,8 @@ __global__ void nms_kernel( const int row_start = blockIdx.y; const int col_start = blockIdx.x; - if (row_start > col_start) return; + if (row_start > col_start) + return; const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); @@ -68,6 +72,8 @@ __global__ void nms_kernel( } } +} // namespace + at::Tensor nms_cuda(const at::Tensor& dets, const at::Tensor& scores, double iou_threshold) { diff --git a/torchvision/csrc/cuda/nms_kernel.h b/torchvision/csrc/cuda/nms_kernel.h new file mode 100644 index 00000000000..1eceddaccf3 --- /dev/null +++ b/torchvision/csrc/cuda/nms_kernel.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor nms_cuda( + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index 834973c5327..b17f00d6acf 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -4,11 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API at::Tensor nms_cuda( - const at::Tensor& dets, - const at::Tensor& scores, - double iou_threshold); - VISION_API std::tuple PSROIAlign_forward_cuda( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/nms.cpp b/torchvision/csrc/nms.cpp new file mode 100644 index 00000000000..075f3101937 --- /dev/null +++ b/torchvision/csrc/nms.cpp @@ -0,0 +1,29 @@ +#include "nms.h" +#include + +#if defined(WITH_CUDA) || defined(WITH_HIP) +#include +#endif + +at::Tensor nms( + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("torchvision::nms", "") + .typed(); + return op.call(dets, scores, iou_threshold); +} + +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor nms_autocast( + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold) { + c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); + return nms( + at::autocast::cached_cast(at::kFloat, dets), + at::autocast::cached_cast(at::kFloat, scores), + iou_threshold); +} +#endif diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.h index aed675e5d26..87b07548454 100644 --- a/torchvision/csrc/nms.h +++ b/torchvision/csrc/nms.h @@ -1,36 +1,24 @@ #pragma once -#include "cpu/vision_cpu.h" +#include "cpu/nms_kernel.h" #ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" +#include "cuda/nms_kernel.h" #endif #ifdef WITH_HIP -#include "autocast.h" -#include "hip/vision_cuda.h" +#include "hip/nms_kernel.h" #endif -// nms dispatch nexus +// C++ Forward at::Tensor nms( const at::Tensor& dets, const at::Tensor& scores, - double iou_threshold) { - static auto op = c10::Dispatcher::singleton() - .findSchemaOrThrow("torchvision::nms", "") - .typed(); - return op.call(dets, scores, iou_threshold); -} + double iou_threshold); +// Autocast Forward #if defined(WITH_CUDA) || defined(WITH_HIP) at::Tensor nms_autocast( const at::Tensor& dets, const at::Tensor& scores, - double iou_threshold) { - c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); - return nms( - at::autocast::cached_cast(at::kFloat, dets), - at::autocast::cached_cast(at::kFloat, scores), - iou_threshold); -} + double iou_threshold); #endif From f8756722f160ccfef1764964b0471baa709f0ec1 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 21:55:05 +0000 Subject: [PATCH 3/9] Encapsulate and standardize ps_roi_align (#3082) * Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API. Syncing, where possible, the names of functions across devices. * Adding all internal functions in anonymous namespaces. * Renaming C++/CUDA kernel files and moving operator code from header to cpp file. * Create foreach cpp file a separate header file with "public" functions. * Removing unnecessary repeated includes. --- ...IAlign_cpu.cpp => ps_roi_align_kernel.cpp} | 28 ++++---- torchvision/csrc/cpu/ps_roi_align_kernel.h | 25 +++++++ torchvision/csrc/cpu/vision_cpu.h | 21 ------ ...OIAlign_cuda.cu => ps_roi_align_kernel.cu} | 28 ++++---- torchvision/csrc/cuda/ps_roi_align_kernel.h | 25 +++++++ torchvision/csrc/cuda/vision_cuda.h | 21 ------ .../csrc/{PSROIAlign.h => ps_roi_align.cpp} | 28 ++++---- torchvision/csrc/ps_roi_align.h | 66 +++++++++++++++++++ torchvision/csrc/vision.cpp | 16 ++--- 9 files changed, 165 insertions(+), 93 deletions(-) rename torchvision/csrc/cpu/{PSROIAlign_cpu.cpp => ps_roi_align_kernel.cpp} (95%) create mode 100644 torchvision/csrc/cpu/ps_roi_align_kernel.h rename torchvision/csrc/cuda/{PSROIAlign_cuda.cu => ps_roi_align_kernel.cu} (95%) create mode 100644 torchvision/csrc/cuda/ps_roi_align_kernel.h rename torchvision/csrc/{PSROIAlign.h => ps_roi_align.cpp} (93%) create mode 100644 torchvision/csrc/ps_roi_align.h diff --git a/torchvision/csrc/cpu/PSROIAlign_cpu.cpp b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp similarity index 95% rename from torchvision/csrc/cpu/PSROIAlign_cpu.cpp rename to torchvision/csrc/cpu/ps_roi_align_kernel.cpp index 899dbb208b6..a56fbe58e9a 100644 --- a/torchvision/csrc/cpu/PSROIAlign_cpu.cpp +++ b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include "ps_roi_align_kernel.h" + +namespace { template T bilinear_interpolate( @@ -57,7 +57,7 @@ T bilinear_interpolate( } template -void PSROIAlignForwardCPU( +void ps_roi_align_forward_kernel_impl( int nthreads, const T* input, const T spatial_scale, @@ -202,7 +202,7 @@ inline void add(T* address, const T& val) { } template -void PSROIAlignBackwardCPU( +void ps_roi_align_backward_kernel_impl( int nthreads, const T* grad_output, const int* channel_mapping, @@ -298,7 +298,9 @@ void PSROIAlignBackwardCPU( } } -std::tuple PSROIAlign_forward_cpu( +} // namespace + +std::tuple ps_roi_align_forward_cpu( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -313,7 +315,7 @@ std::tuple PSROIAlign_forward_cpu( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "PSROIAlign_forward_cpu"; + at::CheckedFrom c = "ps_roi_align_forward_cpu"; at::checkAllSameType(c, {input_t, rois_t}); int num_rois = rois.size(0); @@ -338,8 +340,8 @@ std::tuple PSROIAlign_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "PSROIAlign_forward", [&] { - PSROIAlignForwardCPU( + input.scalar_type(), "ps_roi_align_forward", [&] { + ps_roi_align_forward_kernel_impl( output_size, input_.data_ptr(), spatial_scale, @@ -357,7 +359,7 @@ std::tuple PSROIAlign_forward_cpu( return std::make_tuple(output, channel_mapping); } -at::Tensor PSROIAlign_backward_cpu( +at::Tensor ps_roi_align_backward_cpu( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, @@ -379,7 +381,7 @@ at::Tensor PSROIAlign_backward_cpu( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; - at::CheckedFrom c = "PSROIAlign_backward_cpu"; + at::CheckedFrom c = "ps_roi_align_backward_cpu"; at::checkAllSameType(c, {grad_t, rois_t}); auto num_rois = rois.size(0); @@ -395,8 +397,8 @@ at::Tensor PSROIAlign_backward_cpu( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "PSROIAlign_backward", [&] { - PSROIAlignBackwardCPU( + grad.scalar_type(), "ps_roi_align_backward", [&] { + ps_roi_align_backward_kernel_impl( grad.numel(), grad_.data_ptr(), channel_mapping.data_ptr(), diff --git a/torchvision/csrc/cpu/ps_roi_align_kernel.h b/torchvision/csrc/cpu/ps_roi_align_kernel.h new file mode 100644 index 00000000000..86a3f9a8876 --- /dev/null +++ b/torchvision/csrc/cpu/ps_roi_align_kernel.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API std::tuple ps_roi_align_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); + +VISION_API at::Tensor ps_roi_align_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index 39d89bf6515..22119b5e292 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -4,27 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API std::tuple PSROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio); - -VISION_API at::Tensor PSROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& channel_mapping, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width); - VISION_API std::tuple PSROIPool_forward_cpu( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/cuda/PSROIAlign_cuda.cu b/torchvision/csrc/cuda/ps_roi_align_kernel.cu similarity index 95% rename from torchvision/csrc/cuda/PSROIAlign_cuda.cu rename to torchvision/csrc/cuda/ps_roi_align_kernel.cu index e6912d8c7ee..4ac0c28de4c 100644 --- a/torchvision/csrc/cuda/PSROIAlign_cuda.cu +++ b/torchvision/csrc/cuda/ps_roi_align_kernel.cu @@ -1,11 +1,11 @@ -#include -#include #include #include #include -#include #include "cuda_helpers.h" +#include "ps_roi_align_kernel.h" + +namespace { template __device__ T bilinear_interpolate( @@ -62,7 +62,7 @@ __device__ T bilinear_interpolate( } template -__global__ void PSROIAlignForwardCUDA( +__global__ void ps_roi_align_forward_kernel_impl( int nthreads, const T* input, const T spatial_scale, @@ -195,7 +195,7 @@ __device__ void bilinear_interpolate_gradient( } template -__global__ void PSROIAlignBackwardCUDA( +__global__ void ps_roi_align_backward_kernel_impl( int nthreads, const T* grad_output, const int* channel_mapping, @@ -292,7 +292,9 @@ __global__ void PSROIAlignBackwardCUDA( } } -std::tuple PSROIAlign_forward_cuda( +} // namespace + +std::tuple ps_roi_align_forward_cuda( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -307,7 +309,7 @@ std::tuple PSROIAlign_forward_cuda( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "PSROIAlign_forward_cuda"; + at::CheckedFrom c = "ps_roi_align_forward_cuda"; at::checkAllSameGPU(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t}); @@ -344,8 +346,8 @@ std::tuple PSROIAlign_forward_cuda( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "PSROIAlign_forward", [&] { - PSROIAlignForwardCUDA<<>>( + input.scalar_type(), "ps_roi_align_forward", [&] { + ps_roi_align_forward_kernel_impl<<>>( output_size, input_.data_ptr(), spatial_scale, @@ -365,7 +367,7 @@ std::tuple PSROIAlign_forward_cuda( return std::make_tuple(output, channel_mapping); } -at::Tensor PSROIAlign_backward_cuda( +at::Tensor ps_roi_align_backward_cuda( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, @@ -387,7 +389,7 @@ at::Tensor PSROIAlign_backward_cuda( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; - at::CheckedFrom c = "PSROIAlign_backward_cuda"; + at::CheckedFrom c = "ps_roi_align_backward_cuda"; at::checkAllSameGPU(c, {grad_t, rois_t, channel_mapping_t}); at::checkAllSameType(c, {grad_t, rois_t}); @@ -415,8 +417,8 @@ at::Tensor PSROIAlign_backward_cuda( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "PSROIAlign_backward", [&] { - PSROIAlignBackwardCUDA<<>>( + grad.scalar_type(), "ps_roi_align_backward", [&] { + ps_roi_align_backward_kernel_impl<<>>( grad.numel(), grad_.data_ptr(), channel_mapping.data_ptr(), diff --git a/torchvision/csrc/cuda/ps_roi_align_kernel.h b/torchvision/csrc/cuda/ps_roi_align_kernel.h new file mode 100644 index 00000000000..45a300d6711 --- /dev/null +++ b/torchvision/csrc/cuda/ps_roi_align_kernel.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API std::tuple ps_roi_align_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); + +VISION_API at::Tensor ps_roi_align_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index b17f00d6acf..c80386a8db1 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -4,27 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API std::tuple PSROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio); - -VISION_API at::Tensor PSROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& channel_mapping, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width); - VISION_API std::tuple PSROIPool_forward_cuda( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/PSROIAlign.h b/torchvision/csrc/ps_roi_align.cpp similarity index 93% rename from torchvision/csrc/PSROIAlign.h rename to torchvision/csrc/ps_roi_align.cpp index 1e5dd17aabc..0e1a30d6e63 100644 --- a/torchvision/csrc/PSROIAlign.h +++ b/torchvision/csrc/ps_roi_align.cpp @@ -1,20 +1,10 @@ -#pragma once +#include "ps_roi_align.h" +#include -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" -#endif -#ifdef WITH_HIP -#include "autocast.h" -#include "hip/vision_cuda.h" +#if defined(WITH_CUDA) || defined(WITH_HIP) +#include #endif -#include - -// TODO: put this stuff in torchvision namespace - std::tuple ps_roi_align( const at::Tensor& input, const at::Tensor& rois, @@ -30,7 +20,7 @@ std::tuple ps_roi_align( } #if defined(WITH_CUDA) || defined(WITH_HIP) -std::tuple PSROIAlign_autocast( +std::tuple ps_roi_align_autocast( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -82,6 +72,8 @@ at::Tensor _ps_roi_align_backward( width); } +namespace { + class PSROIAlignFunction : public torch::autograd::Function { public: @@ -186,7 +178,9 @@ class PSROIAlignBackwardFunction } }; -std::tuple PSROIAlign_autograd( +} // namespace + +std::tuple ps_roi_align_autograd( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -199,7 +193,7 @@ std::tuple PSROIAlign_autograd( return std::make_tuple(result[0], result[1]); } -at::Tensor PSROIAlign_backward_autograd( +at::Tensor ps_roi_align_backward_autograd( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, diff --git a/torchvision/csrc/ps_roi_align.h b/torchvision/csrc/ps_roi_align.h new file mode 100644 index 00000000000..0f7ecea2f12 --- /dev/null +++ b/torchvision/csrc/ps_roi_align.h @@ -0,0 +1,66 @@ +#pragma once + +#include "cpu/ps_roi_align_kernel.h" + +#ifdef WITH_CUDA +#include "cuda/ps_roi_align_kernel.h" +#endif +#ifdef WITH_HIP +#include "hip/ps_roi_align_kernel.h" +#endif + +// C++ Forward +std::tuple ps_roi_align( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); + +// Autocast Forward +#if defined(WITH_CUDA) || defined(WITH_HIP) +std::tuple ps_roi_align_autocast( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); +#endif + +// C++ Backward +at::Tensor _ps_roi_align_backward( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); + +// Autograd Forward and Backward +std::tuple ps_roi_align_autograd( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio); + +at::Tensor ps_roi_align_backward_autograd( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index 2d4e2af0f53..c5c204aac2b 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -8,13 +8,13 @@ #include #endif -#include "PSROIAlign.h" #include "PSROIPool.h" #include "ROIAlign.h" #include "ROIPool.h" #include "deform_conv2d.h" #include "empty_tensor_op.h" #include "nms.h" +#include "ps_roi_align.h" // If we are in a Windows environment, we need to define // initialization functions for the _custom_ops extension @@ -65,8 +65,8 @@ TORCH_LIBRARY_IMPL(torchvision, CPU, m) { m.impl("deform_conv2d", deform_conv2d_forward_cpu); m.impl("_deform_conv2d_backward", deform_conv2d_backward_cpu); m.impl("nms", nms_cpu); - m.impl("ps_roi_align", PSROIAlign_forward_cpu); - m.impl("_ps_roi_align_backward", PSROIAlign_backward_cpu); + m.impl("ps_roi_align", ps_roi_align_forward_cpu); + m.impl("_ps_roi_align_backward", ps_roi_align_backward_cpu); m.impl("ps_roi_pool", PSROIPool_forward_cpu); m.impl("_ps_roi_pool_backward", PSROIPool_backward_cpu); m.impl("roi_align", ROIAlign_forward_cpu); @@ -81,8 +81,8 @@ TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { m.impl("deform_conv2d", deform_conv2d_forward_cuda); m.impl("_deform_conv2d_backward", deform_conv2d_backward_cuda); m.impl("nms", nms_cuda); - m.impl("ps_roi_align", PSROIAlign_forward_cuda); - m.impl("_ps_roi_align_backward", PSROIAlign_backward_cuda); + m.impl("ps_roi_align", ps_roi_align_forward_cuda); + m.impl("_ps_roi_align_backward", ps_roi_align_backward_cuda); m.impl("ps_roi_pool", PSROIPool_forward_cuda); m.impl("_ps_roi_pool_backward", PSROIPool_backward_cuda); m.impl("roi_align", ROIAlign_forward_cuda); @@ -97,7 +97,7 @@ TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { m.impl("deform_conv2d", deform_conv2d_autocast); m.impl("nms", nms_autocast); - m.impl("ps_roi_align", PSROIAlign_autocast); + m.impl("ps_roi_align", ps_roi_align_autocast); m.impl("ps_roi_pool", PSROIPool_autocast); m.impl("roi_align", ROIAlign_autocast); m.impl("roi_pool", ROIPool_autocast); @@ -107,8 +107,8 @@ TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { TORCH_LIBRARY_IMPL(torchvision, Autograd, m) { m.impl("deform_conv2d", deform_conv2d_autograd); m.impl("_deform_conv2d_backward", deform_conv2d_backward_autograd); - m.impl("ps_roi_align", PSROIAlign_autograd); - m.impl("_ps_roi_align_backward", PSROIAlign_backward_autograd); + m.impl("ps_roi_align", ps_roi_align_autograd); + m.impl("_ps_roi_align_backward", ps_roi_align_backward_autograd); m.impl("ps_roi_pool", PSROIPool_autograd); m.impl("_ps_roi_pool_backward", PSROIPool_backward_autograd); m.impl("roi_align", ROIAlign_autograd); From 750bde315e3ec7591fde7544f8f9af4d8e9b666d Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 23:05:45 +0000 Subject: [PATCH 4/9] Encapsulate and standardize ps_roi_pool (#3084) * Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API. * Adding all internal functions in anonymous namespaces. * Renaming C++/CUDA kernel files and moving operator code from header to cpp file. * Create foreach cpp file a separate header file with "public" functions. * Removing unnecessary repeated includes. --- ...ROIPool_cpu.cpp => ps_roi_pool_kernel.cpp} | 29 ++++----- torchvision/csrc/cpu/ps_roi_pool_kernel.h | 23 +++++++ torchvision/csrc/cpu/vision_cpu.h | 19 ------ ...SROIPool_cuda.cu => ps_roi_pool_kernel.cu} | 27 ++++---- torchvision/csrc/cuda/ps_roi_pool_kernel.h | 23 +++++++ torchvision/csrc/cuda/vision_cuda.h | 19 ------ .../csrc/{PSROIPool.h => ps_roi_pool.cpp} | 26 ++++---- torchvision/csrc/ps_roi_pool.h | 61 +++++++++++++++++++ torchvision/csrc/vision.cpp | 16 ++--- 9 files changed, 156 insertions(+), 87 deletions(-) rename torchvision/csrc/cpu/{PSROIPool_cpu.cpp => ps_roi_pool_kernel.cpp} (93%) create mode 100644 torchvision/csrc/cpu/ps_roi_pool_kernel.h rename torchvision/csrc/cuda/{PSROIPool_cuda.cu => ps_roi_pool_kernel.cu} (93%) create mode 100644 torchvision/csrc/cuda/ps_roi_pool_kernel.h rename torchvision/csrc/{PSROIPool.h => ps_roi_pool.cpp} (93%) create mode 100644 torchvision/csrc/ps_roi_pool.h diff --git a/torchvision/csrc/cpu/PSROIPool_cpu.cpp b/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp similarity index 93% rename from torchvision/csrc/cpu/PSROIPool_cpu.cpp rename to torchvision/csrc/cpu/ps_roi_pool_kernel.cpp index c6e0a64cac3..171de9edc6a 100644 --- a/torchvision/csrc/cpu/PSROIPool_cpu.cpp +++ b/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp @@ -1,7 +1,6 @@ -#include -#include -#include -#include +#include "ps_roi_pool_kernel.h" + +namespace { template inline void add(T* address, const T& val) { @@ -9,7 +8,7 @@ inline void add(T* address, const T& val) { } template -void PSROIPoolForward( +void ps_roi_pool_forward_kernel_impl( const T* input, const T spatial_scale, int channels, @@ -79,7 +78,7 @@ void PSROIPoolForward( } template -void PSROIPoolBackward( +void ps_roi_pool_backward_kernel_impl( const T* grad_output, const int* channel_mapping, int num_rois, @@ -143,7 +142,9 @@ void PSROIPoolBackward( } } -std::tuple PSROIPool_forward_cpu( +} // namespace + +std::tuple ps_roi_pool_forward_cpu( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -157,7 +158,7 @@ std::tuple PSROIPool_forward_cpu( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "PSROIPool_forward_cpu"; + at::CheckedFrom c = "ps_roi_pool_forward_cpu"; at::checkAllSameType(c, {input_t, rois_t}); int num_rois = rois.size(0); @@ -182,8 +183,8 @@ std::tuple PSROIPool_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "PSROIPool_forward", [&] { - PSROIPoolForward( + input.scalar_type(), "ps_roi_pool_forward", [&] { + ps_roi_pool_forward_kernel_impl( input_.data_ptr(), spatial_scale, channels, @@ -200,7 +201,7 @@ std::tuple PSROIPool_forward_cpu( return std::make_tuple(output, channel_mapping); } -at::Tensor PSROIPool_backward_cpu( +at::Tensor ps_roi_pool_backward_cpu( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, @@ -221,7 +222,7 @@ at::Tensor PSROIPool_backward_cpu( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; - at::CheckedFrom c = "PSROIPool_backward_cpu"; + at::CheckedFrom c = "ps_roi_pool_backward_cpu"; at::checkAllSameType(c, {grad_t, rois_t}); auto num_rois = rois.size(0); @@ -237,8 +238,8 @@ at::Tensor PSROIPool_backward_cpu( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "PSROIPool_backward", [&] { - PSROIPoolBackward( + grad.scalar_type(), "ps_roi_pool_backward", [&] { + ps_roi_pool_backward_kernel_impl( grad_.data_ptr(), channel_mapping.data_ptr(), num_rois, diff --git a/torchvision/csrc/cpu/ps_roi_pool_kernel.h b/torchvision/csrc/cpu/ps_roi_pool_kernel.h new file mode 100644 index 00000000000..14a4e22681a --- /dev/null +++ b/torchvision/csrc/cpu/ps_roi_pool_kernel.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API std::tuple ps_roi_pool_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); + +VISION_API at::Tensor ps_roi_pool_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index 22119b5e292..baf64f89689 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -4,25 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API std::tuple PSROIPool_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width); - -VISION_API at::Tensor PSROIPool_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& channel_mapping, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width); - VISION_API at::Tensor ROIAlign_forward_cpu( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/cuda/PSROIPool_cuda.cu b/torchvision/csrc/cuda/ps_roi_pool_kernel.cu similarity index 93% rename from torchvision/csrc/cuda/PSROIPool_cuda.cu rename to torchvision/csrc/cuda/ps_roi_pool_kernel.cu index ab6a50b009c..aa1c834e059 100644 --- a/torchvision/csrc/cuda/PSROIPool_cuda.cu +++ b/torchvision/csrc/cuda/ps_roi_pool_kernel.cu @@ -1,13 +1,14 @@ -#include -#include #include #include #include #include "cuda_helpers.h" +#include "ps_roi_pool_kernel.h" + +namespace { template -__global__ void PSROIPoolForward( +__global__ void ps_roi_pool_forward_kernel_impl( int nthreads, const T* input, const T spatial_scale, @@ -73,7 +74,7 @@ __global__ void PSROIPoolForward( } template -__global__ void PSROIPoolBackward( +__global__ void ps_roi_pool_backward_kernel_impl( int nthreads, const T* grad_output, const int* channel_mapping, @@ -132,7 +133,9 @@ __global__ void PSROIPoolBackward( } } -std::tuple PSROIPool_forward_cuda( +} // namespace + +std::tuple ps_roi_pool_forward_cuda( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -146,7 +149,7 @@ std::tuple PSROIPool_forward_cuda( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "PSROIPool_forward_cuda"; + at::CheckedFrom c = "ps_roi_pool_forward_cuda"; at::checkAllSameGPU(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t}); @@ -183,8 +186,8 @@ std::tuple PSROIPool_forward_cuda( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "PSROIPool_forward", [&] { - PSROIPoolForward<<>>( + input.scalar_type(), "ps_roi_pool_forward", [&] { + ps_roi_pool_forward_kernel_impl<<>>( output_size, input_.data_ptr(), spatial_scale, @@ -202,7 +205,7 @@ std::tuple PSROIPool_forward_cuda( return std::make_tuple(output, channel_mapping); } -at::Tensor PSROIPool_backward_cuda( +at::Tensor ps_roi_pool_backward_cuda( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, @@ -223,7 +226,7 @@ at::Tensor PSROIPool_backward_cuda( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; - at::CheckedFrom c = "PSROIPool_backward_cuda"; + at::CheckedFrom c = "ps_roi_pool_backward_cuda"; at::checkAllSameGPU(c, {grad_t, rois_t, channel_mapping_t}); at::checkAllSameType(c, {grad_t, rois_t}); @@ -251,8 +254,8 @@ at::Tensor PSROIPool_backward_cuda( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "PSROIPool_backward", [&] { - PSROIPoolBackward<<>>( + grad.scalar_type(), "ps_roi_pool_backward", [&] { + ps_roi_pool_backward_kernel_impl<<>>( grad.numel(), grad_.data_ptr(), channel_mapping.data_ptr(), diff --git a/torchvision/csrc/cuda/ps_roi_pool_kernel.h b/torchvision/csrc/cuda/ps_roi_pool_kernel.h new file mode 100644 index 00000000000..e97f0ee7065 --- /dev/null +++ b/torchvision/csrc/cuda/ps_roi_pool_kernel.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API std::tuple ps_roi_pool_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); + +VISION_API at::Tensor ps_roi_pool_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index c80386a8db1..8d411b9c67e 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -4,25 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API std::tuple PSROIPool_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width); - -VISION_API at::Tensor PSROIPool_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& channel_mapping, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width); - VISION_API at::Tensor ROIAlign_forward_cuda( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/PSROIPool.h b/torchvision/csrc/ps_roi_pool.cpp similarity index 93% rename from torchvision/csrc/PSROIPool.h rename to torchvision/csrc/ps_roi_pool.cpp index c3ced9e7842..76fb2d04be7 100644 --- a/torchvision/csrc/PSROIPool.h +++ b/torchvision/csrc/ps_roi_pool.cpp @@ -1,18 +1,10 @@ -#pragma once +#include "ps_roi_pool.h" +#include -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" -#endif -#ifdef WITH_HIP -#include "autocast.h" -#include "hip/vision_cuda.h" +#if defined(WITH_CUDA) || defined(WITH_HIP) +#include #endif -// TODO: put this stuff in torchvision namespace - std::tuple ps_roi_pool( const at::Tensor& input, const at::Tensor& rois, @@ -26,7 +18,7 @@ std::tuple ps_roi_pool( } #if defined(WITH_CUDA) || defined(WITH_HIP) -std::tuple PSROIPool_autocast( +std::tuple ps_roi_pool_autocast( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -74,6 +66,8 @@ at::Tensor _ps_roi_pool_backward( width); } +namespace { + class PSROIPoolFunction : public torch::autograd::Function { public: static torch::autograd::variable_list forward( @@ -166,7 +160,9 @@ class PSROIPoolBackwardFunction } }; -std::tuple PSROIPool_autograd( +} // namespace + +std::tuple ps_roi_pool_autograd( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -178,7 +174,7 @@ std::tuple PSROIPool_autograd( return std::make_tuple(result[0], result[1]); } -at::Tensor PSROIPool_backward_autograd( +at::Tensor ps_roi_pool_backward_autograd( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& channel_mapping, diff --git a/torchvision/csrc/ps_roi_pool.h b/torchvision/csrc/ps_roi_pool.h new file mode 100644 index 00000000000..0c8baef4a9a --- /dev/null +++ b/torchvision/csrc/ps_roi_pool.h @@ -0,0 +1,61 @@ +#pragma once + +#include "cpu/ps_roi_pool_kernel.h" + +#ifdef WITH_CUDA +#include "cuda/ps_roi_pool_kernel.h" +#endif +#ifdef WITH_HIP +#include "hip/ps_roi_pool_kernel.h" +#endif + +// C++ Forward +std::tuple ps_roi_pool( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); + +// Autocast Forward +#if defined(WITH_CUDA) || defined(WITH_HIP) +std::tuple ps_roi_pool_autocast( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); +#endif + +// C++ Backward +at::Tensor _ps_roi_pool_backward( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); + +// Autograd Forward and Backward +std::tuple ps_roi_pool_autograd( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); + +at::Tensor ps_roi_pool_backward_autograd( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& channel_mapping, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index c5c204aac2b..6f540c6832e 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -8,13 +8,13 @@ #include #endif -#include "PSROIPool.h" #include "ROIAlign.h" #include "ROIPool.h" #include "deform_conv2d.h" #include "empty_tensor_op.h" #include "nms.h" #include "ps_roi_align.h" +#include "ps_roi_pool.h" // If we are in a Windows environment, we need to define // initialization functions for the _custom_ops extension @@ -67,8 +67,8 @@ TORCH_LIBRARY_IMPL(torchvision, CPU, m) { m.impl("nms", nms_cpu); m.impl("ps_roi_align", ps_roi_align_forward_cpu); m.impl("_ps_roi_align_backward", ps_roi_align_backward_cpu); - m.impl("ps_roi_pool", PSROIPool_forward_cpu); - m.impl("_ps_roi_pool_backward", PSROIPool_backward_cpu); + m.impl("ps_roi_pool", ps_roi_pool_forward_cpu); + m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_cpu); m.impl("roi_align", ROIAlign_forward_cpu); m.impl("_roi_align_backward", ROIAlign_backward_cpu); m.impl("roi_pool", ROIPool_forward_cpu); @@ -83,8 +83,8 @@ TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { m.impl("nms", nms_cuda); m.impl("ps_roi_align", ps_roi_align_forward_cuda); m.impl("_ps_roi_align_backward", ps_roi_align_backward_cuda); - m.impl("ps_roi_pool", PSROIPool_forward_cuda); - m.impl("_ps_roi_pool_backward", PSROIPool_backward_cuda); + m.impl("ps_roi_pool", ps_roi_pool_forward_cuda); + m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_cuda); m.impl("roi_align", ROIAlign_forward_cuda); m.impl("_roi_align_backward", ROIAlign_backward_cuda); m.impl("roi_pool", ROIPool_forward_cuda); @@ -98,7 +98,7 @@ TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { m.impl("deform_conv2d", deform_conv2d_autocast); m.impl("nms", nms_autocast); m.impl("ps_roi_align", ps_roi_align_autocast); - m.impl("ps_roi_pool", PSROIPool_autocast); + m.impl("ps_roi_pool", ps_roi_pool_autocast); m.impl("roi_align", ROIAlign_autocast); m.impl("roi_pool", ROIPool_autocast); } @@ -109,8 +109,8 @@ TORCH_LIBRARY_IMPL(torchvision, Autograd, m) { m.impl("_deform_conv2d_backward", deform_conv2d_backward_autograd); m.impl("ps_roi_align", ps_roi_align_autograd); m.impl("_ps_roi_align_backward", ps_roi_align_backward_autograd); - m.impl("ps_roi_pool", PSROIPool_autograd); - m.impl("_ps_roi_pool_backward", PSROIPool_backward_autograd); + m.impl("ps_roi_pool", ps_roi_pool_autograd); + m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_autograd); m.impl("roi_align", ROIAlign_autograd); m.impl("_roi_align_backward", ROIAlign_backward_autograd); m.impl("roi_pool", ROIPool_autograd); From ba02b2fb98cec0ec2f78c1344c76ac7840302f51 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 2 Dec 2020 00:07:07 +0000 Subject: [PATCH 5/9] Encapsulate and standardize roi_align (#3085) * Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API. * Adding all internal functions in anonymous namespaces. * Renaming C++/CUDA kernel files and moving operator code from header to cpp file. * Create foreach cpp file a separate header file with "public" functions. * Removing unnecessary repeated includes. --- test/tracing/frcnn/test_frcnn_tracing.cpp | 2 +- ...{ROIAlign_cpu.cpp => roi_align_kernel.cpp} | 29 ++++---- torchvision/csrc/cpu/roi_align_kernel.h | 26 +++++++ torchvision/csrc/cpu/vision_cpu.h | 22 ------ .../{ROIAlign_cuda.cu => roi_align_kernel.cu} | 29 ++++---- torchvision/csrc/cuda/roi_align_kernel.h | 26 +++++++ torchvision/csrc/cuda/vision_cuda.h | 22 ------ .../csrc/{ROIAlign.h => roi_align.cpp} | 27 +++----- torchvision/csrc/roi_align.h | 69 +++++++++++++++++++ torchvision/csrc/vision.cpp | 16 ++--- 10 files changed, 173 insertions(+), 95 deletions(-) rename torchvision/csrc/cpu/{ROIAlign_cpu.cpp => roi_align_kernel.cpp} (96%) create mode 100644 torchvision/csrc/cpu/roi_align_kernel.h rename torchvision/csrc/cuda/{ROIAlign_cuda.cu => roi_align_kernel.cu} (94%) create mode 100644 torchvision/csrc/cuda/roi_align_kernel.h rename torchvision/csrc/{ROIAlign.h => roi_align.cpp} (94%) create mode 100644 torchvision/csrc/roi_align.h diff --git a/test/tracing/frcnn/test_frcnn_tracing.cpp b/test/tracing/frcnn/test_frcnn_tracing.cpp index a23b95cf88f..95b3a1b5726 100644 --- a/test/tracing/frcnn/test_frcnn_tracing.cpp +++ b/test/tracing/frcnn/test_frcnn_tracing.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include diff --git a/torchvision/csrc/cpu/ROIAlign_cpu.cpp b/torchvision/csrc/cpu/roi_align_kernel.cpp similarity index 96% rename from torchvision/csrc/cpu/ROIAlign_cpu.cpp rename to torchvision/csrc/cpu/roi_align_kernel.cpp index 10ebd8158cc..01d2bca25a3 100644 --- a/torchvision/csrc/cpu/ROIAlign_cpu.cpp +++ b/torchvision/csrc/cpu/roi_align_kernel.cpp @@ -1,5 +1,6 @@ -#include -#include "vision_cpu.h" +#include "roi_align_kernel.h" + +namespace { // implementation taken from Caffe2 template @@ -111,7 +112,7 @@ void pre_calc_for_bilinear_interpolate( } template -void ROIAlignForward( +void roi_align_forward_kernel_impl( int nthreads, const T* input, const T& spatial_scale, @@ -277,7 +278,7 @@ inline void add(T* address, const T& val) { } template -void ROIAlignBackward( +void roi_align_backward_kernel_impl( int nthreads, const T* grad_output, const T& spatial_scale, @@ -382,9 +383,11 @@ void ROIAlignBackward( } // ix } // iy } // for -} // ROIAlignBackward +} + +} // namespace -at::Tensor ROIAlign_forward_cpu( +at::Tensor roi_align_forward_cpu( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -398,7 +401,7 @@ at::Tensor ROIAlign_forward_cpu( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIAlign_forward_cpu"; + at::CheckedFrom c = "roi_align_forward_cpu"; at::checkAllSameType(c, {input_t, rois_t}); auto num_rois = rois.size(0); @@ -416,8 +419,8 @@ at::Tensor ROIAlign_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ROIAlign_forward", [&] { - ROIAlignForward( + input.scalar_type(), "roi_align_forward", [&] { + roi_align_forward_kernel_impl( output_size, input_.data_ptr(), spatial_scale, @@ -434,7 +437,7 @@ at::Tensor ROIAlign_forward_cpu( return output; } -at::Tensor ROIAlign_backward_cpu( +at::Tensor roi_align_backward_cpu( const at::Tensor& grad, const at::Tensor& rois, double spatial_scale, @@ -451,7 +454,7 @@ at::Tensor ROIAlign_backward_cpu( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIAlign_backward_cpu"; + at::CheckedFrom c = "roi_align_backward_cpu"; at::checkAllSameType(c, {grad_t, rois_t}); at::Tensor grad_input = @@ -470,8 +473,8 @@ at::Tensor ROIAlign_backward_cpu( auto rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ROIAlign_forward", [&] { - ROIAlignBackward( + grad.scalar_type(), "roi_align_forward", [&] { + roi_align_backward_kernel_impl( grad.numel(), grad.data_ptr(), spatial_scale, diff --git a/torchvision/csrc/cpu/roi_align_kernel.h b/torchvision/csrc/cpu/roi_align_kernel.h new file mode 100644 index 00000000000..79fd46bd44e --- /dev/null +++ b/torchvision/csrc/cpu/roi_align_kernel.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor roi_align_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + bool aligned); + +VISION_API at::Tensor roi_align_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width, + int64_t sampling_ratio, + bool aligned); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index baf64f89689..a2647c57aa5 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -4,28 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API at::Tensor ROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio, - bool aligned); - -VISION_API at::Tensor ROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width, - int64_t sampling_ratio, - bool aligned); - VISION_API std::tuple ROIPool_forward_cpu( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/cuda/ROIAlign_cuda.cu b/torchvision/csrc/cuda/roi_align_kernel.cu similarity index 94% rename from torchvision/csrc/cuda/ROIAlign_cuda.cu rename to torchvision/csrc/cuda/roi_align_kernel.cu index b773121d2b9..7f763170a9e 100644 --- a/torchvision/csrc/cuda/ROIAlign_cuda.cu +++ b/torchvision/csrc/cuda/roi_align_kernel.cu @@ -1,10 +1,11 @@ -#include -#include #include #include #include #include "cuda_helpers.h" +#include "roi_align_kernel.h" + +namespace { template __device__ T bilinear_interpolate( @@ -61,7 +62,7 @@ __device__ T bilinear_interpolate( } template -__global__ void RoIAlignForward( +__global__ void roi_align_forward_kernel_impl( int nthreads, const T* input, const T spatial_scale, @@ -197,7 +198,7 @@ __device__ void bilinear_interpolate_gradient( } template -__global__ void RoIAlignBackward( +__global__ void roi_align_backward_kernel_impl( int nthreads, const T* grad_output, const T spatial_scale, @@ -308,9 +309,11 @@ __global__ void RoIAlignBackward( } // ix } // iy } // CUDA_1D_KERNEL_LOOP -} // RoIAlignBackward +} + +} // namespace -at::Tensor ROIAlign_forward_cuda( +at::Tensor roi_align_forward_cuda( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -325,7 +328,7 @@ at::Tensor ROIAlign_forward_cuda( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIAlign_forward_cuda"; + at::CheckedFrom c = "roi_align_forward_cuda"; at::checkAllSameGPU(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t}); @@ -354,8 +357,8 @@ at::Tensor ROIAlign_forward_cuda( auto input_ = input.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "ROIAlign_forward", [&] { - RoIAlignForward<<>>( + AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "roi_align_forward", [&] { + roi_align_forward_kernel_impl<<>>( output_size, input_.data_ptr(), spatial_scale, @@ -373,7 +376,7 @@ at::Tensor ROIAlign_forward_cuda( return output; } -at::Tensor ROIAlign_backward_cuda( +at::Tensor roi_align_backward_cuda( const at::Tensor& grad, const at::Tensor& rois, double spatial_scale, @@ -390,7 +393,7 @@ at::Tensor ROIAlign_backward_cuda( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIAlign_backward_cuda"; + at::CheckedFrom c = "roi_align_backward_cuda"; at::checkAllSameGPU(c, {grad_t, rois_t}); at::checkAllSameType(c, {grad_t, rois_t}); @@ -418,8 +421,8 @@ at::Tensor ROIAlign_backward_cuda( int w_stride = grad.stride(3); auto rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "ROIAlign_backward", [&] { - RoIAlignBackward<<>>( + AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "roi_align_backward", [&] { + roi_align_backward_kernel_impl<<>>( grad.numel(), grad.data_ptr(), spatial_scale, diff --git a/torchvision/csrc/cuda/roi_align_kernel.h b/torchvision/csrc/cuda/roi_align_kernel.h new file mode 100644 index 00000000000..46054f04f38 --- /dev/null +++ b/torchvision/csrc/cuda/roi_align_kernel.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor roi_align_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + bool aligned); + +VISION_API at::Tensor roi_align_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width, + int64_t sampling_ratio, + bool aligned); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index 8d411b9c67e..1ec187c3348 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -4,28 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API at::Tensor ROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t sampling_ratio, - bool aligned); - -VISION_API at::Tensor ROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width, - int64_t sampling_ratio, - bool aligned); - VISION_API std::tuple ROIPool_forward_cuda( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/roi_align.cpp similarity index 94% rename from torchvision/csrc/ROIAlign.h rename to torchvision/csrc/roi_align.cpp index 708981f061e..30eda8612d2 100644 --- a/torchvision/csrc/ROIAlign.h +++ b/torchvision/csrc/roi_align.cpp @@ -1,19 +1,10 @@ -#pragma once +#include "roi_align.h" +#include -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" -#endif -#ifdef WITH_HIP -#include "autocast.h" -#include "hip/vision_cuda.h" +#if defined(WITH_CUDA) || defined(WITH_HIP) +#include #endif -// TODO: put this stuff in torchvision namespace - -// roi_align dispatch nexus at::Tensor roi_align( const at::Tensor& input, // Input feature map. const at::Tensor& rois, // List of ROIs to pool over. @@ -39,7 +30,7 @@ at::Tensor roi_align( } #if defined(WITH_CUDA) || defined(WITH_HIP) -at::Tensor ROIAlign_autocast( +at::Tensor roi_align_autocast( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -90,6 +81,8 @@ at::Tensor _roi_align_backward( aligned); } +namespace { + class ROIAlignFunction : public torch::autograd::Function { public: static torch::autograd::variable_list forward( @@ -189,7 +182,9 @@ class ROIAlignBackwardFunction } }; -at::Tensor ROIAlign_autograd( +} // namespace + +at::Tensor roi_align_autograd( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -207,7 +202,7 @@ at::Tensor ROIAlign_autograd( aligned)[0]; } -at::Tensor ROIAlign_backward_autograd( +at::Tensor roi_align_backward_autograd( const at::Tensor& grad, const at::Tensor& rois, double spatial_scale, diff --git a/torchvision/csrc/roi_align.h b/torchvision/csrc/roi_align.h new file mode 100644 index 00000000000..d9bae4ba2a1 --- /dev/null +++ b/torchvision/csrc/roi_align.h @@ -0,0 +1,69 @@ +#pragma once + +#include "cpu/roi_align_kernel.h" + +#ifdef WITH_CUDA +#include "cuda/roi_align_kernel.h" +#endif +#ifdef WITH_HIP +#include "hip/roi_align_kernel.h" +#endif + +// C++ Forward +at::Tensor roi_align( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + bool aligned); + +// Autocast Forward +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor roi_align_autocast( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + bool aligned); +#endif + +// C++ Backward +at::Tensor _roi_align_backward( + const at::Tensor& grad, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width, + int64_t sampling_ratio, + bool aligned); + +// Autograd Forward and Backward +at::Tensor roi_align_autograd( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t sampling_ratio, + bool aligned); + +at::Tensor roi_align_backward_autograd( + const at::Tensor& grad, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width, + int64_t sampling_ratio, + bool aligned); diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index 6f540c6832e..c41663f0736 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -8,13 +8,13 @@ #include #endif -#include "ROIAlign.h" #include "ROIPool.h" #include "deform_conv2d.h" #include "empty_tensor_op.h" #include "nms.h" #include "ps_roi_align.h" #include "ps_roi_pool.h" +#include "roi_align.h" // If we are in a Windows environment, we need to define // initialization functions for the _custom_ops extension @@ -69,8 +69,8 @@ TORCH_LIBRARY_IMPL(torchvision, CPU, m) { m.impl("_ps_roi_align_backward", ps_roi_align_backward_cpu); m.impl("ps_roi_pool", ps_roi_pool_forward_cpu); m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_cpu); - m.impl("roi_align", ROIAlign_forward_cpu); - m.impl("_roi_align_backward", ROIAlign_backward_cpu); + m.impl("roi_align", roi_align_forward_cpu); + m.impl("_roi_align_backward", roi_align_backward_cpu); m.impl("roi_pool", ROIPool_forward_cpu); m.impl("_roi_pool_backward", ROIPool_backward_cpu); } @@ -85,8 +85,8 @@ TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { m.impl("_ps_roi_align_backward", ps_roi_align_backward_cuda); m.impl("ps_roi_pool", ps_roi_pool_forward_cuda); m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_cuda); - m.impl("roi_align", ROIAlign_forward_cuda); - m.impl("_roi_align_backward", ROIAlign_backward_cuda); + m.impl("roi_align", roi_align_forward_cuda); + m.impl("_roi_align_backward", roi_align_backward_cuda); m.impl("roi_pool", ROIPool_forward_cuda); m.impl("_roi_pool_backward", ROIPool_backward_cuda); } @@ -99,7 +99,7 @@ TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { m.impl("nms", nms_autocast); m.impl("ps_roi_align", ps_roi_align_autocast); m.impl("ps_roi_pool", ps_roi_pool_autocast); - m.impl("roi_align", ROIAlign_autocast); + m.impl("roi_align", roi_align_autocast); m.impl("roi_pool", ROIPool_autocast); } #endif @@ -111,8 +111,8 @@ TORCH_LIBRARY_IMPL(torchvision, Autograd, m) { m.impl("_ps_roi_align_backward", ps_roi_align_backward_autograd); m.impl("ps_roi_pool", ps_roi_pool_autograd); m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_autograd); - m.impl("roi_align", ROIAlign_autograd); - m.impl("_roi_align_backward", ROIAlign_backward_autograd); + m.impl("roi_align", roi_align_autograd); + m.impl("_roi_align_backward", roi_align_backward_autograd); m.impl("roi_pool", ROIPool_autograd); m.impl("_roi_pool_backward", ROIPool_backward_autograd); } From 3c3c625f144cbbe936ad33252af78349e68f7cca Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 2 Dec 2020 01:21:10 +0000 Subject: [PATCH 6/9] Encapsulate and standardize roi_pool (#3088) * Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API. * Adding all internal functions in anonymous namespaces. * Syncing variable names between the cpp files and their header files. * Renaming C++/CUDA kernel files and moving operator code from header to cpp file. * Create foreach cpp file a separate header file with "public" functions. * Removing unnecessary repeated includes. --- .../{ROIPool_cpu.cpp => roi_pool_kernel.cpp} | 31 +++++----- torchvision/csrc/cpu/roi_pool_kernel.h | 23 +++++++ torchvision/csrc/cpu/vision_cpu.h | 19 ------ .../{ROIPool_cuda.cu => roi_pool_kernel.cu} | 28 +++++---- torchvision/csrc/cuda/roi_pool_kernel.h | 23 +++++++ torchvision/csrc/cuda/vision_cuda.h | 19 ------ torchvision/csrc/{ROIPool.h => roi_pool.cpp} | 26 ++++---- torchvision/csrc/roi_pool.h | 61 +++++++++++++++++++ torchvision/csrc/vision.cpp | 16 ++--- 9 files changed, 159 insertions(+), 87 deletions(-) rename torchvision/csrc/cpu/{ROIPool_cpu.cpp => roi_pool_kernel.cpp} (92%) create mode 100644 torchvision/csrc/cpu/roi_pool_kernel.h rename torchvision/csrc/cuda/{ROIPool_cuda.cu => roi_pool_kernel.cu} (91%) create mode 100644 torchvision/csrc/cuda/roi_pool_kernel.h rename torchvision/csrc/{ROIPool.h => roi_pool.cpp} (93%) create mode 100644 torchvision/csrc/roi_pool.h diff --git a/torchvision/csrc/cpu/ROIPool_cpu.cpp b/torchvision/csrc/cpu/roi_pool_kernel.cpp similarity index 92% rename from torchvision/csrc/cpu/ROIPool_cpu.cpp rename to torchvision/csrc/cpu/roi_pool_kernel.cpp index 34da4f1d1cc..389e9c90248 100644 --- a/torchvision/csrc/cpu/ROIPool_cpu.cpp +++ b/torchvision/csrc/cpu/roi_pool_kernel.cpp @@ -1,7 +1,8 @@ -#include -#include -#include -#include +#include + +#include "roi_pool_kernel.h" + +namespace { template inline void add(T* address, const T& val) { @@ -9,7 +10,7 @@ inline void add(T* address, const T& val) { } template -void RoIPoolForward( +void roi_pool_forward_kernel_impl( const T* input, const T spatial_scale, int channels, @@ -78,7 +79,7 @@ void RoIPoolForward( } template -void RoIPoolBackward( +void roi_pool_backward_kernel_impl( const T* grad_output, const int* argmax_data, int num_rois, @@ -120,7 +121,9 @@ void RoIPoolBackward( } // num_rois } -std::tuple ROIPool_forward_cpu( +} // namespace + +std::tuple roi_pool_forward_cpu( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -131,7 +134,7 @@ std::tuple ROIPool_forward_cpu( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIPool_forward_cpu"; + at::CheckedFrom c = "roi_pool_forward_cpu"; at::checkAllSameType(c, {input_t, rois_t}); int num_rois = rois.size(0); @@ -151,8 +154,8 @@ std::tuple ROIPool_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ROIPool_forward", [&] { - RoIPoolForward( + input.scalar_type(), "roi_pool_forward", [&] { + roi_pool_forward_kernel_impl( input_.data_ptr(), spatial_scale, channels, @@ -168,7 +171,7 @@ std::tuple ROIPool_forward_cpu( return std::make_tuple(output, argmax); } -at::Tensor ROIPool_backward_cpu( +at::Tensor roi_pool_backward_cpu( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& argmax, @@ -188,7 +191,7 @@ at::Tensor ROIPool_backward_cpu( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIPool_backward_cpu"; + at::CheckedFrom c = "roi_pool_backward_cpu"; at::checkAllSameType(c, {grad_t, rois_t}); auto num_rois = rois.size(0); @@ -209,8 +212,8 @@ at::Tensor ROIPool_backward_cpu( auto rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ROIPool_backward", [&] { - RoIPoolBackward( + grad.scalar_type(), "roi_pool_backward", [&] { + roi_pool_backward_kernel_impl( grad.data_ptr(), argmax.data_ptr(), num_rois, diff --git a/torchvision/csrc/cpu/roi_pool_kernel.h b/torchvision/csrc/cpu/roi_pool_kernel.h new file mode 100644 index 00000000000..66fd993d5b4 --- /dev/null +++ b/torchvision/csrc/cpu/roi_pool_kernel.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API std::tuple roi_pool_forward_cpu( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); + +VISION_API at::Tensor roi_pool_backward_cpu( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& argmax, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index a2647c57aa5..a772fa13f01 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -3,22 +3,3 @@ #include "../macros.h" // TODO: Delete this file once all the methods are gone - -VISION_API std::tuple ROIPool_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width); - -VISION_API at::Tensor ROIPool_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& argmax, - double spatial_scale, - int64_t pooled_height, - int64_t pooled_width, - int64_t batch_size, - int64_t channels, - int64_t height, - int64_t width); diff --git a/torchvision/csrc/cuda/ROIPool_cuda.cu b/torchvision/csrc/cuda/roi_pool_kernel.cu similarity index 91% rename from torchvision/csrc/cuda/ROIPool_cuda.cu rename to torchvision/csrc/cuda/roi_pool_kernel.cu index 3131b9eea7e..c10dd0cf403 100644 --- a/torchvision/csrc/cuda/ROIPool_cuda.cu +++ b/torchvision/csrc/cuda/roi_pool_kernel.cu @@ -1,13 +1,15 @@ -#include -#include #include #include +#include #include #include "cuda_helpers.h" +#include "roi_pool_kernel.h" + +namespace { template -__global__ void RoIPoolForward( +__global__ void roi_pool_forward_kernel_impl( int nthreads, const T* input, const T spatial_scale, @@ -72,7 +74,7 @@ __global__ void RoIPoolForward( } template -__global__ void RoIPoolBackward( +__global__ void roi_pool_backward_kernel_impl( int nthreads, const T* grad_output, const int* argmax_data, @@ -115,7 +117,9 @@ __global__ void RoIPoolBackward( } } -std::tuple ROIPool_forward_cuda( +} // namespace + +std::tuple roi_pool_forward_cuda( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -128,7 +132,7 @@ std::tuple ROIPool_forward_cuda( at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIPool_forward_cuda"; + at::CheckedFrom c = "roi_pool_forward_cuda"; at::checkAllSameGPU(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t}); @@ -160,8 +164,8 @@ std::tuple ROIPool_forward_cuda( auto input_ = input.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "ROIPool_forward", [&] { - RoIPoolForward<<>>( + AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "roi_pool_forward", [&] { + roi_pool_forward_kernel_impl<<>>( output_size, input_.data_ptr(), spatial_scale, @@ -178,7 +182,7 @@ std::tuple ROIPool_forward_cuda( return std::make_tuple(output, argmax); } -at::Tensor ROIPool_backward_cuda( +at::Tensor roi_pool_backward_cuda( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& argmax, @@ -197,7 +201,7 @@ at::Tensor ROIPool_backward_cuda( at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, argmax_t{argmax, "argmax", 3}; - at::CheckedFrom c = "ROIPool_backward_cuda"; + at::CheckedFrom c = "roi_pool_backward_cuda"; at::checkAllSameGPU(c, {grad_t, rois_t, argmax_t}); at::checkAllSameType(c, {grad_t, rois_t}); @@ -228,8 +232,8 @@ at::Tensor ROIPool_backward_cuda( auto argmax_ = argmax.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "ROIPool_backward", [&] { - RoIPoolBackward<<>>( + AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "roi_pool_backward", [&] { + roi_pool_backward_kernel_impl<<>>( grad.numel(), grad.data_ptr(), argmax_.data_ptr(), diff --git a/torchvision/csrc/cuda/roi_pool_kernel.h b/torchvision/csrc/cuda/roi_pool_kernel.h new file mode 100644 index 00000000000..3a99f7521bd --- /dev/null +++ b/torchvision/csrc/cuda/roi_pool_kernel.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API std::tuple roi_pool_forward_cuda( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); + +VISION_API at::Tensor roi_pool_backward_cuda( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& argmax, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index 1ec187c3348..a772fa13f01 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -3,22 +3,3 @@ #include "../macros.h" // TODO: Delete this file once all the methods are gone - -VISION_API std::tuple ROIPool_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width); - -VISION_API at::Tensor ROIPool_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& argmax, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t batch_size, - const int64_t channels, - const int64_t height, - const int64_t width); diff --git a/torchvision/csrc/ROIPool.h b/torchvision/csrc/roi_pool.cpp similarity index 93% rename from torchvision/csrc/ROIPool.h rename to torchvision/csrc/roi_pool.cpp index 7950005f1bd..c8d70bd8940 100644 --- a/torchvision/csrc/ROIPool.h +++ b/torchvision/csrc/roi_pool.cpp @@ -1,18 +1,10 @@ -#pragma once +#include "roi_pool.h" +#include -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" -#endif -#ifdef WITH_HIP -#include "autocast.h" -#include "hip/vision_cuda.h" +#if defined(WITH_CUDA) || defined(WITH_HIP) +#include #endif -// TODO: put this stuff in torchvision namespace - std::tuple roi_pool( const at::Tensor& input, const at::Tensor& rois, @@ -26,7 +18,7 @@ std::tuple roi_pool( } #if defined(WITH_CUDA) || defined(WITH_HIP) -std::tuple ROIPool_autocast( +std::tuple roi_pool_autocast( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -73,6 +65,8 @@ at::Tensor _roi_pool_backward( width); } +namespace { + class ROIPoolFunction : public torch::autograd::Function { public: static torch::autograd::variable_list forward( @@ -165,7 +159,9 @@ class ROIPoolBackwardFunction } }; -std::tuple ROIPool_autograd( +} // namespace + +std::tuple roi_pool_autograd( const at::Tensor& input, const at::Tensor& rois, double spatial_scale, @@ -177,7 +173,7 @@ std::tuple ROIPool_autograd( return std::make_tuple(result[0], result[1]); } -at::Tensor ROIPool_backward_autograd( +at::Tensor roi_pool_backward_autograd( const at::Tensor& grad, const at::Tensor& rois, const at::Tensor& argmax, diff --git a/torchvision/csrc/roi_pool.h b/torchvision/csrc/roi_pool.h new file mode 100644 index 00000000000..f528ce6d7e0 --- /dev/null +++ b/torchvision/csrc/roi_pool.h @@ -0,0 +1,61 @@ +#pragma once + +#include "cpu/roi_pool_kernel.h" + +#ifdef WITH_CUDA +#include "cuda/roi_pool_kernel.h" +#endif +#ifdef WITH_HIP +#include "hip/roi_pool_kernel.h" +#endif + +// C++ Forward +std::tuple roi_pool( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); + +// Autocast Forward +#if defined(WITH_CUDA) || defined(WITH_HIP) +std::tuple roi_pool_autocast( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); +#endif + +// C++ Backward +at::Tensor _roi_pool_backward( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& argmax, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); + +// Autograd Forward and Backward +std::tuple roi_pool_autograd( + const at::Tensor& input, + const at::Tensor& rois, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width); + +at::Tensor roi_pool_backward_autograd( + const at::Tensor& grad, + const at::Tensor& rois, + const at::Tensor& argmax, + double spatial_scale, + int64_t pooled_height, + int64_t pooled_width, + int64_t batch_size, + int64_t channels, + int64_t height, + int64_t width); diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index c41663f0736..d764ec9334b 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -8,13 +8,13 @@ #include #endif -#include "ROIPool.h" #include "deform_conv2d.h" #include "empty_tensor_op.h" #include "nms.h" #include "ps_roi_align.h" #include "ps_roi_pool.h" #include "roi_align.h" +#include "roi_pool.h" // If we are in a Windows environment, we need to define // initialization functions for the _custom_ops extension @@ -71,8 +71,8 @@ TORCH_LIBRARY_IMPL(torchvision, CPU, m) { m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_cpu); m.impl("roi_align", roi_align_forward_cpu); m.impl("_roi_align_backward", roi_align_backward_cpu); - m.impl("roi_pool", ROIPool_forward_cpu); - m.impl("_roi_pool_backward", ROIPool_backward_cpu); + m.impl("roi_pool", roi_pool_forward_cpu); + m.impl("_roi_pool_backward", roi_pool_backward_cpu); } // TODO: Place this in a hypothetical separate torchvision_cuda library @@ -87,8 +87,8 @@ TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_cuda); m.impl("roi_align", roi_align_forward_cuda); m.impl("_roi_align_backward", roi_align_backward_cuda); - m.impl("roi_pool", ROIPool_forward_cuda); - m.impl("_roi_pool_backward", ROIPool_backward_cuda); + m.impl("roi_pool", roi_pool_forward_cuda); + m.impl("_roi_pool_backward", roi_pool_backward_cuda); } #endif @@ -100,7 +100,7 @@ TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { m.impl("ps_roi_align", ps_roi_align_autocast); m.impl("ps_roi_pool", ps_roi_pool_autocast); m.impl("roi_align", roi_align_autocast); - m.impl("roi_pool", ROIPool_autocast); + m.impl("roi_pool", roi_pool_autocast); } #endif @@ -113,6 +113,6 @@ TORCH_LIBRARY_IMPL(torchvision, Autograd, m) { m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_autograd); m.impl("roi_align", roi_align_autograd); m.impl("_roi_align_backward", roi_align_backward_autograd); - m.impl("roi_pool", ROIPool_autograd); - m.impl("_roi_pool_backward", ROIPool_backward_autograd); + m.impl("roi_pool", roi_pool_autograd); + m.impl("_roi_pool_backward", roi_pool_backward_autograd); } From 651bfbd269358aa8f9a2fdf5eabdb232321e91ba Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 2 Dec 2020 08:58:28 +0000 Subject: [PATCH 7/9] Encapsulate and standardize new_empty_tensor_op (#3089) * Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API. * Create foreach cpp file a separate header file with "public" functions. * Adding all internal functions in anonymous namespaces. * Convert to const ref all possible parameters. * Removing unnecessary repeated includes. --- ...pty_tensor_op.h => new_empty_tensor_op.cpp} | 18 ++++++++++++------ torchvision/csrc/new_empty_tensor_op.h | 7 +++++++ torchvision/csrc/vision.cpp | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) rename torchvision/csrc/{empty_tensor_op.h => new_empty_tensor_op.cpp} (67%) create mode 100644 torchvision/csrc/new_empty_tensor_op.h diff --git a/torchvision/csrc/empty_tensor_op.h b/torchvision/csrc/new_empty_tensor_op.cpp similarity index 67% rename from torchvision/csrc/empty_tensor_op.h rename to torchvision/csrc/new_empty_tensor_op.cpp index 99448109762..e4f31600c54 100644 --- a/torchvision/csrc/empty_tensor_op.h +++ b/torchvision/csrc/new_empty_tensor_op.cpp @@ -1,14 +1,16 @@ #pragma once -// All pure C++ headers for the C++ frontend. -#include +#include "new_empty_tensor_op.h" +#include + +namespace { class NewEmptyTensorOp : public torch::autograd::Function { public: static torch::autograd::variable_list forward( torch::autograd::AutogradContext* ctx, - torch::autograd::Variable input, - c10::List new_shape) { + const torch::autograd::Variable& input, + const c10::List& new_shape) { ctx->saved_data["shape"] = input.sizes(); std::vector shape(new_shape.begin(), new_shape.end()); return {input.new_empty(shape, at::TensorOptions())}; @@ -16,7 +18,7 @@ class NewEmptyTensorOp : public torch::autograd::Function { static torch::autograd::variable_list backward( torch::autograd::AutogradContext* ctx, - torch::autograd::variable_list grad_output) { + const torch::autograd::variable_list& grad_output) { // Use data saved in forward auto shape = ctx->saved_data["shape"].toIntList(); auto out = forward(ctx, grad_output[0], shape); @@ -24,6 +26,10 @@ class NewEmptyTensorOp : public torch::autograd::Function { } }; -at::Tensor new_empty_tensor(const at::Tensor& input, c10::List shape) { +} // namespace + +at::Tensor new_empty_tensor( + const at::Tensor& input, + const c10::List& shape) { return NewEmptyTensorOp::apply(input, shape)[0]; } diff --git a/torchvision/csrc/new_empty_tensor_op.h b/torchvision/csrc/new_empty_tensor_op.h new file mode 100644 index 00000000000..75f4cd5a7fe --- /dev/null +++ b/torchvision/csrc/new_empty_tensor_op.h @@ -0,0 +1,7 @@ +#pragma once + +#include + +at::Tensor new_empty_tensor( + const at::Tensor& input, + const c10::List& shape); diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index d764ec9334b..fb0bf014912 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -9,7 +9,7 @@ #endif #include "deform_conv2d.h" -#include "empty_tensor_op.h" +#include "new_empty_tensor_op.h" #include "nms.h" #include "ps_roi_align.h" #include "ps_roi_pool.h" From 2855e9193e46447d6c5858901a950299933ccf34 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 2 Dec 2020 11:05:49 +0000 Subject: [PATCH 8/9] Encapsulate and standardize C++ Ops - Clean up (#3094) * Removing unnecessary repeated includes. * Remove unnecessary vision_cpu.h, vision_cuda.h, autocast.h. * Fixing naming convention and correcting method names on macros. * Turn on clang formatter for cu files and fixing broken styles. * Replace "#ifndef ... #define ... #endif" with "#pragma once" on header files. --- setup.py | 4 +- test/tracing/frcnn/test_frcnn_tracing.cpp | 1 - torchvision/csrc/autocast.h | 7 -- torchvision/csrc/cpu/deform_conv2d_kernel.cpp | 4 +- torchvision/csrc/cpu/nms_kernel.cpp | 6 +- torchvision/csrc/cpu/ps_roi_align_kernel.cpp | 4 +- torchvision/csrc/cpu/ps_roi_pool_kernel.cpp | 4 +- torchvision/csrc/cpu/roi_align_kernel.cpp | 4 +- torchvision/csrc/cpu/roi_pool_kernel.cpp | 4 +- torchvision/csrc/cpu/video/register.cpp | 4 +- torchvision/csrc/cpu/vision_cpu.h | 5 -- torchvision/csrc/cuda/deform_conv2d_kernel.cu | 23 +++--- torchvision/csrc/cuda/nms_kernel.cu | 13 +-- torchvision/csrc/cuda/ps_roi_align_kernel.cu | 17 ++-- torchvision/csrc/cuda/ps_roi_pool_kernel.cu | 17 ++-- torchvision/csrc/cuda/roi_align_kernel.cu | 80 +++++++++---------- torchvision/csrc/cuda/roi_pool_kernel.cu | 78 +++++++++--------- torchvision/csrc/cuda/vision_cuda.h | 5 -- torchvision/csrc/macros.h | 5 +- torchvision/csrc/models/alexnet.h | 5 +- torchvision/csrc/models/densenet.h | 5 +- torchvision/csrc/models/general.h | 5 +- torchvision/csrc/models/googlenet.h | 5 +- torchvision/csrc/models/inception.h | 5 +- torchvision/csrc/models/mnasnet.h | 5 +- torchvision/csrc/models/mobilenet.h | 5 +- torchvision/csrc/models/models.h | 5 +- torchvision/csrc/models/modelsimpl.h | 5 +- torchvision/csrc/models/resnet.h | 5 +- torchvision/csrc/models/shufflenetv2.h | 5 +- torchvision/csrc/models/squeezenet.h | 5 +- torchvision/csrc/models/vgg.h | 5 +- torchvision/csrc/vision.h | 5 +- .../run-clang-format/run-clang-format.py | 2 +- 34 files changed, 141 insertions(+), 216 deletions(-) delete mode 100644 torchvision/csrc/autocast.h delete mode 100644 torchvision/csrc/cpu/vision_cpu.h delete mode 100644 torchvision/csrc/cuda/vision_cuda.h diff --git a/setup.py b/setup.py index 82c93be87cd..0a363794da5 100644 --- a/setup.py +++ b/setup.py @@ -152,8 +152,8 @@ def get_extensions(): ) source_cuda = glob.glob(os.path.join(extensions_dir, 'hip', '*.hip')) # Copy over additional files - shutil.copy("torchvision/csrc/cuda/cuda_helpers.h", "torchvision/csrc/hip/cuda_helpers.h") - shutil.copy("torchvision/csrc/cuda/vision_cuda.h", "torchvision/csrc/hip/vision_cuda.h") + for file in glob.glob(r"torchvision/csrc/cuda/*.h"): + shutil.copy(file, "torchvision/csrc/hip") else: source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu')) diff --git a/test/tracing/frcnn/test_frcnn_tracing.cpp b/test/tracing/frcnn/test_frcnn_tracing.cpp index 95b3a1b5726..90476b24f4b 100644 --- a/test/tracing/frcnn/test_frcnn_tracing.cpp +++ b/test/tracing/frcnn/test_frcnn_tracing.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #ifdef _WIN32 diff --git a/torchvision/csrc/autocast.h b/torchvision/csrc/autocast.h deleted file mode 100644 index 584ef13f389..00000000000 --- a/torchvision/csrc/autocast.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -// TODO: Delete this file once none of the methods use it - -#if defined(WITH_CUDA) || defined(WITH_HIP) -#include -#endif diff --git a/torchvision/csrc/cpu/deform_conv2d_kernel.cpp b/torchvision/csrc/cpu/deform_conv2d_kernel.cpp index f593e880b3b..5cac99db04a 100644 --- a/torchvision/csrc/cpu/deform_conv2d_kernel.cpp +++ b/torchvision/csrc/cpu/deform_conv2d_kernel.cpp @@ -350,7 +350,7 @@ void compute_grad_input( channels * weight_h * weight_w * out_h * out_w * parallel_imgs; AT_DISPATCH_FLOATING_TYPES_AND_HALF( - columns.scalar_type(), "deformable_col2im", ([&] { + columns.scalar_type(), "compute_grad_input", ([&] { deformable_col2im_kernel( num_kernels, columns.data_ptr(), @@ -551,7 +551,7 @@ void compute_grad_offset_and_mask( out_h * out_w * 2 * weight_h * weight_w * n_offset_grps * parallel_imgs; AT_DISPATCH_FLOATING_TYPES_AND_HALF( - columns.scalar_type(), "deformable_col2im_coord", ([&] { + columns.scalar_type(), "compute_grad_offset_and_mask", ([&] { deformable_col2im_coord_kernel( num_kernels, columns.data_ptr(), diff --git a/torchvision/csrc/cpu/nms_kernel.cpp b/torchvision/csrc/cpu/nms_kernel.cpp index 036a91f56dc..52953a9e822 100644 --- a/torchvision/csrc/cpu/nms_kernel.cpp +++ b/torchvision/csrc/cpu/nms_kernel.cpp @@ -3,7 +3,7 @@ namespace { template -at::Tensor nms_kernel( +at::Tensor nms_kernel_impl( const at::Tensor& dets, const at::Tensor& scores, double iou_threshold) { @@ -98,8 +98,8 @@ at::Tensor nms_cpu( auto result = at::empty({0}, dets.options()); - AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { - result = nms_kernel(dets, scores, iou_threshold); + AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_cpu", [&] { + result = nms_kernel_impl(dets, scores, iou_threshold); }); return result; } diff --git a/torchvision/csrc/cpu/ps_roi_align_kernel.cpp b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp index a56fbe58e9a..3d6c95f02ea 100644 --- a/torchvision/csrc/cpu/ps_roi_align_kernel.cpp +++ b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp @@ -340,7 +340,7 @@ std::tuple ps_roi_align_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ps_roi_align_forward", [&] { + input.scalar_type(), "ps_roi_align_forward_cpu", [&] { ps_roi_align_forward_kernel_impl( output_size, input_.data_ptr(), @@ -397,7 +397,7 @@ at::Tensor ps_roi_align_backward_cpu( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ps_roi_align_backward", [&] { + grad.scalar_type(), "ps_roi_align_backward_cpu", [&] { ps_roi_align_backward_kernel_impl( grad.numel(), grad_.data_ptr(), diff --git a/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp b/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp index 171de9edc6a..cdee9b9f55c 100644 --- a/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp +++ b/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp @@ -183,7 +183,7 @@ std::tuple ps_roi_pool_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ps_roi_pool_forward", [&] { + input.scalar_type(), "ps_roi_pool_forward_cpu", [&] { ps_roi_pool_forward_kernel_impl( input_.data_ptr(), spatial_scale, @@ -238,7 +238,7 @@ at::Tensor ps_roi_pool_backward_cpu( auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ps_roi_pool_backward", [&] { + grad.scalar_type(), "ps_roi_pool_backward_cpu", [&] { ps_roi_pool_backward_kernel_impl( grad_.data_ptr(), channel_mapping.data_ptr(), diff --git a/torchvision/csrc/cpu/roi_align_kernel.cpp b/torchvision/csrc/cpu/roi_align_kernel.cpp index 01d2bca25a3..133722fdc5e 100644 --- a/torchvision/csrc/cpu/roi_align_kernel.cpp +++ b/torchvision/csrc/cpu/roi_align_kernel.cpp @@ -419,7 +419,7 @@ at::Tensor roi_align_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "roi_align_forward", [&] { + input.scalar_type(), "roi_align_forward_cpu", [&] { roi_align_forward_kernel_impl( output_size, input_.data_ptr(), @@ -473,7 +473,7 @@ at::Tensor roi_align_backward_cpu( auto rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "roi_align_forward", [&] { + grad.scalar_type(), "roi_align_backward_cpu", [&] { roi_align_backward_kernel_impl( grad.numel(), grad.data_ptr(), diff --git a/torchvision/csrc/cpu/roi_pool_kernel.cpp b/torchvision/csrc/cpu/roi_pool_kernel.cpp index 389e9c90248..d622f2b430b 100644 --- a/torchvision/csrc/cpu/roi_pool_kernel.cpp +++ b/torchvision/csrc/cpu/roi_pool_kernel.cpp @@ -154,7 +154,7 @@ std::tuple roi_pool_forward_cpu( auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "roi_pool_forward", [&] { + input.scalar_type(), "roi_pool_forward_cpu", [&] { roi_pool_forward_kernel_impl( input_.data_ptr(), spatial_scale, @@ -212,7 +212,7 @@ at::Tensor roi_pool_backward_cpu( auto rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "roi_pool_backward", [&] { + grad.scalar_type(), "roi_pool_backward_cpu", [&] { roi_pool_backward_kernel_impl( grad.data_ptr(), argmax.data_ptr(), diff --git a/torchvision/csrc/cpu/video/register.cpp b/torchvision/csrc/cpu/video/register.cpp index a88615987bf..9d538444f3f 100644 --- a/torchvision/csrc/cpu/video/register.cpp +++ b/torchvision/csrc/cpu/video/register.cpp @@ -1,5 +1,4 @@ -#ifndef REGISTER_H -#define REGISTER_H +#pragma once #include "Video.h" @@ -15,4 +14,3 @@ static auto registerVideo = .def("next", &Video::Next); } // namespace -#endif diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h deleted file mode 100644 index a772fa13f01..00000000000 --- a/torchvision/csrc/cpu/vision_cpu.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once -#include -#include "../macros.h" - -// TODO: Delete this file once all the methods are gone diff --git a/torchvision/csrc/cuda/deform_conv2d_kernel.cu b/torchvision/csrc/cuda/deform_conv2d_kernel.cu index 6edaa9c73af..cef8124caf3 100644 --- a/torchvision/csrc/cuda/deform_conv2d_kernel.cu +++ b/torchvision/csrc/cuda/deform_conv2d_kernel.cu @@ -66,7 +66,6 @@ // modified from // https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp -#include #include #include #include @@ -88,7 +87,9 @@ inline unsigned int GET_THREADS() { return 512; } -inline unsigned int GET_BLOCKS(const unsigned int THREADS, const unsigned int N) { +inline unsigned int GET_BLOCKS( + const unsigned int THREADS, + const unsigned int N) { unsigned int kMaxGridNum = at::cuda::getCurrentDeviceProperties()->maxGridSize[0]; return std::min(kMaxGridNum, (N + THREADS - 1) / THREADS); @@ -235,10 +236,8 @@ void deformable_im2col( const unsigned int blocks = GET_BLOCKS(threads, num_kernels); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "deformable_im2col_gpu", ([&] { - deformable_im2col_kernel<<< - blocks, - threads>>>( + input.scalar_type(), "deformable_im2col", ([&] { + deformable_im2col_kernel<<>>( num_kernels, input.data_ptr(), data_offset.data_ptr(), @@ -381,10 +380,8 @@ void compute_grad_input( const unsigned int blocks = GET_BLOCKS(threads, num_kernels); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - columns.scalar_type(), "deformable_col2im_gpu", ([&] { - deformable_col2im_kernel<<< - blocks, - threads>>>( + columns.scalar_type(), "compute_grad_input", ([&] { + deformable_col2im_kernel<<>>( num_kernels, columns.data_ptr(), offset.data_ptr(), @@ -589,10 +586,8 @@ void compute_grad_offset_and_mask( const unsigned int blocks = GET_BLOCKS(threads, num_kernels); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - columns.scalar_type(), "deformable_col2im_coord_gpu", ([&] { - deformable_col2im_coord_kernel<<< - blocks, - threads>>>( + columns.scalar_type(), "compute_grad_offset_and_mask", ([&] { + deformable_col2im_coord_kernel<<>>( num_kernels, columns.data_ptr(), input.data_ptr(), diff --git a/torchvision/csrc/cuda/nms_kernel.cu b/torchvision/csrc/cuda/nms_kernel.cu index 8785bd84897..ae244efebe7 100644 --- a/torchvision/csrc/cuda/nms_kernel.cu +++ b/torchvision/csrc/cuda/nms_kernel.cu @@ -1,4 +1,3 @@ -#include #include #include @@ -24,7 +23,7 @@ __device__ inline bool devIoU( } template -__global__ void nms_kernel( +__global__ void nms_kernel_impl( int n_boxes, double iou_threshold, const T* dev_boxes, @@ -74,7 +73,8 @@ __global__ void nms_kernel( } // namespace -at::Tensor nms_cuda(const at::Tensor& dets, +at::Tensor nms_cuda( + const at::Tensor& dets, const at::Tensor& scores, double iou_threshold) { TORCH_CHECK(dets.is_cuda(), "dets must be a CUDA tensor"); @@ -124,8 +124,8 @@ at::Tensor nms_cuda(const at::Tensor& dets, cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - dets_sorted.scalar_type(), "nms_kernel_cuda", [&] { - nms_kernel<<>>( + dets_sorted.scalar_type(), "nms_cuda", [&] { + nms_kernel_impl<<>>( dets_num, iou_threshold, dets_sorted.data_ptr(), @@ -133,7 +133,8 @@ at::Tensor nms_cuda(const at::Tensor& dets, }); at::Tensor mask_cpu = mask.to(at::kCPU); - unsigned long long* mask_host = (unsigned long long*)mask_cpu.data_ptr(); + unsigned long long* mask_host = + (unsigned long long*)mask_cpu.data_ptr(); std::vector remv(col_blocks); memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); diff --git a/torchvision/csrc/cuda/ps_roi_align_kernel.cu b/torchvision/csrc/cuda/ps_roi_align_kernel.cu index 4ac0c28de4c..7c808580258 100644 --- a/torchvision/csrc/cuda/ps_roi_align_kernel.cu +++ b/torchvision/csrc/cuda/ps_roi_align_kernel.cu @@ -339,14 +339,13 @@ std::tuple ps_roi_align_forward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(output_size), static_cast(512)), + ceil_div(static_cast(output_size), static_cast(512)), static_cast(4096))); dim3 block(512); - auto input_ = input.contiguous(), - rois_ = rois.contiguous(); + auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ps_roi_align_forward", [&] { + input.scalar_type(), "ps_roi_align_forward_cuda", [&] { ps_roi_align_forward_kernel_impl<<>>( output_size, input_.data_ptr(), @@ -383,8 +382,7 @@ at::Tensor ps_roi_align_backward_cuda( TORCH_CHECK(grad.is_cuda(), "grad must be a CUDA tensor"); TORCH_CHECK(rois.is_cuda(), "rois must be a CUDA tensor"); TORCH_CHECK( - channel_mapping.is_cuda(), - "channel_mapping must be a CUDA tensor"); + channel_mapping.is_cuda(), "channel_mapping must be a CUDA tensor"); at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; @@ -402,7 +400,7 @@ at::Tensor ps_roi_align_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(grad.numel()), static_cast(512)), + ceil_div(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -414,10 +412,9 @@ at::Tensor ps_roi_align_backward_cuda( int channels_out = channels / (pooled_height * pooled_width); - auto grad_ = grad.contiguous(), - rois_ = rois.contiguous(); + auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ps_roi_align_backward", [&] { + grad.scalar_type(), "ps_roi_align_backward_cuda", [&] { ps_roi_align_backward_kernel_impl<<>>( grad.numel(), grad_.data_ptr(), diff --git a/torchvision/csrc/cuda/ps_roi_pool_kernel.cu b/torchvision/csrc/cuda/ps_roi_pool_kernel.cu index aa1c834e059..ed0ed26484d 100644 --- a/torchvision/csrc/cuda/ps_roi_pool_kernel.cu +++ b/torchvision/csrc/cuda/ps_roi_pool_kernel.cu @@ -179,14 +179,13 @@ std::tuple ps_roi_pool_forward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(output_size), static_cast(512)), + ceil_div(static_cast(output_size), static_cast(512)), static_cast(4096))); dim3 block(512); - auto input_ = input.contiguous(), - rois_ = rois.contiguous(); + auto input_ = input.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ps_roi_pool_forward", [&] { + input.scalar_type(), "ps_roi_pool_forward_cuda", [&] { ps_roi_pool_forward_kernel_impl<<>>( output_size, input_.data_ptr(), @@ -220,8 +219,7 @@ at::Tensor ps_roi_pool_backward_cuda( TORCH_CHECK(grad.is_cuda(), "grad must be a CUDA tensor"); TORCH_CHECK(rois.is_cuda(), "rois must be a CUDA tensor"); TORCH_CHECK( - channel_mapping.is_cuda(), - "channel_mapping must be a CUDA tensor"); + channel_mapping.is_cuda(), "channel_mapping must be a CUDA tensor"); at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, channel_mapping_t{channel_mapping, "channel_mapping", 3}; @@ -239,7 +237,7 @@ at::Tensor ps_roi_pool_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(grad.numel()), static_cast(512)), + ceil_div(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -251,10 +249,9 @@ at::Tensor ps_roi_pool_backward_cuda( int channels_out = channels / (pooled_height * pooled_width); - auto grad_ = grad.contiguous(), - rois_ = rois.contiguous(); + auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ps_roi_pool_backward", [&] { + grad.scalar_type(), "ps_roi_pool_backward_cuda", [&] { ps_roi_pool_backward_kernel_impl<<>>( grad.numel(), grad_.data_ptr(), diff --git a/torchvision/csrc/cuda/roi_align_kernel.cu b/torchvision/csrc/cuda/roi_align_kernel.cu index 7f763170a9e..195d8b067f4 100644 --- a/torchvision/csrc/cuda/roi_align_kernel.cu +++ b/torchvision/csrc/cuda/roi_align_kernel.cu @@ -323,8 +323,7 @@ at::Tensor roi_align_forward_cuda( bool aligned) { TORCH_CHECK(input.is_cuda(), "input must be a CUDA tensor"); TORCH_CHECK(rois.is_cuda(), "rois must be a CUDA tensor"); - TORCH_CHECK( - rois.size(1) == 5, "rois must have shape as Tensor[K, 5]"); + TORCH_CHECK(rois.size(1) == 5, "rois must have shape as Tensor[K, 5]"); at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; @@ -346,7 +345,7 @@ at::Tensor roi_align_forward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(output_size), static_cast(512)), + ceil_div(static_cast(output_size), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -355,23 +354,23 @@ at::Tensor roi_align_forward_cuda( return output; } - auto input_ = input.contiguous(), - rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "roi_align_forward", [&] { - roi_align_forward_kernel_impl<<>>( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - aligned, - rois_.data_ptr(), - output.data_ptr()); - }); + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "roi_align_forward_cuda", [&] { + roi_align_forward_kernel_impl<<>>( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + aligned, + rois_.data_ptr(), + output.data_ptr()); + }); AT_CUDA_CHECK(cudaGetLastError()); return output; } @@ -405,7 +404,7 @@ at::Tensor roi_align_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(grad.numel()), static_cast(512)), + ceil_div(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -421,25 +420,26 @@ at::Tensor roi_align_backward_cuda( int w_stride = grad.stride(3); auto rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "roi_align_backward", [&] { - roi_align_backward_kernel_impl<<>>( - grad.numel(), - grad.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - aligned, - grad_input.data_ptr(), - rois_.data_ptr(), - n_stride, - c_stride, - h_stride, - w_stride); - }); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad.scalar_type(), "roi_align_backward_cuda", [&] { + roi_align_backward_kernel_impl<<>>( + grad.numel(), + grad.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + sampling_ratio, + aligned, + grad_input.data_ptr(), + rois_.data_ptr(), + n_stride, + c_stride, + h_stride, + w_stride); + }); AT_CUDA_CHECK(cudaGetLastError()); return grad_input; } diff --git a/torchvision/csrc/cuda/roi_pool_kernel.cu b/torchvision/csrc/cuda/roi_pool_kernel.cu index c10dd0cf403..782ecaf9eb3 100644 --- a/torchvision/csrc/cuda/roi_pool_kernel.cu +++ b/torchvision/csrc/cuda/roi_pool_kernel.cu @@ -153,7 +153,7 @@ std::tuple roi_pool_forward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(output_size), static_cast(512)), + ceil_div(static_cast(output_size), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -162,22 +162,22 @@ std::tuple roi_pool_forward_cuda( return std::make_tuple(output, argmax); } - auto input_ = input.contiguous(), - rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "roi_pool_forward", [&] { - roi_pool_forward_kernel_impl<<>>( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - rois_.data_ptr(), - output.data_ptr(), - argmax.data_ptr()); - }); + auto input_ = input.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "roi_pool_forward_cuda", [&] { + roi_pool_forward_kernel_impl<<>>( + output_size, + input_.data_ptr(), + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + rois_.data_ptr(), + output.data_ptr(), + argmax.data_ptr()); + }); AT_CUDA_CHECK(cudaGetLastError()); return std::make_tuple(output, argmax); } @@ -215,7 +215,7 @@ at::Tensor roi_pool_backward_cuda( cudaStream_t stream = at::cuda::getCurrentCUDAStream(); dim3 grid(std::min( - ceil_div(static_cast(grad.numel()), static_cast(512)), + ceil_div(static_cast(grad.numel()), static_cast(512)), static_cast(4096))); dim3 block(512); @@ -230,27 +230,27 @@ at::Tensor roi_pool_backward_cuda( int h_stride = grad.stride(2); int w_stride = grad.stride(3); - auto argmax_ = argmax.contiguous(), - rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "roi_pool_backward", [&] { - roi_pool_backward_kernel_impl<<>>( - grad.numel(), - grad.data_ptr(), - argmax_.data_ptr(), - num_rois, - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - grad_input.data_ptr(), - rois_.data_ptr(), - n_stride, - c_stride, - h_stride, - w_stride); - }); + auto argmax_ = argmax.contiguous(), rois_ = rois.contiguous(); + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + grad.scalar_type(), "roi_pool_backward_cuda", [&] { + roi_pool_backward_kernel_impl<<>>( + grad.numel(), + grad.data_ptr(), + argmax_.data_ptr(), + num_rois, + spatial_scale, + channels, + height, + width, + pooled_height, + pooled_width, + grad_input.data_ptr(), + rois_.data_ptr(), + n_stride, + c_stride, + h_stride, + w_stride); + }); AT_CUDA_CHECK(cudaGetLastError()); return grad_input; } diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h deleted file mode 100644 index a772fa13f01..00000000000 --- a/torchvision/csrc/cuda/vision_cuda.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once -#include -#include "../macros.h" - -// TODO: Delete this file once all the methods are gone diff --git a/torchvision/csrc/macros.h b/torchvision/csrc/macros.h index cb01005a022..559140a933a 100644 --- a/torchvision/csrc/macros.h +++ b/torchvision/csrc/macros.h @@ -1,5 +1,4 @@ -#ifndef TORCHVISION_MACROS_H -#define TORCHVISION_MACROS_H +#pragma once #ifdef _WIN32 #if defined(torchvision_EXPORTS) @@ -20,5 +19,3 @@ #define VISION_INLINE_VARIABLE __attribute__((weak)) #endif #endif - -#endif // TORCHVISION_MACROS_H diff --git a/torchvision/csrc/models/alexnet.h b/torchvision/csrc/models/alexnet.h index 673598d3a53..33ffe379a97 100644 --- a/torchvision/csrc/models/alexnet.h +++ b/torchvision/csrc/models/alexnet.h @@ -1,5 +1,4 @@ -#ifndef ALEXNET_H -#define ALEXNET_H +#pragma once #include #include "general.h" @@ -20,5 +19,3 @@ TORCH_MODULE(AlexNet); } // namespace models } // namespace vision - -#endif // ALEXNET_H diff --git a/torchvision/csrc/models/densenet.h b/torchvision/csrc/models/densenet.h index 731d0c7879f..22db45b719d 100644 --- a/torchvision/csrc/models/densenet.h +++ b/torchvision/csrc/models/densenet.h @@ -1,5 +1,4 @@ -#ifndef DENSENET_H -#define DENSENET_H +#pragma once #include #include "general.h" @@ -82,5 +81,3 @@ TORCH_MODULE(DenseNet161); } // namespace models } // namespace vision - -#endif // DENSENET_H diff --git a/torchvision/csrc/models/general.h b/torchvision/csrc/models/general.h index 1378a1c85d2..4463786b4bb 100644 --- a/torchvision/csrc/models/general.h +++ b/torchvision/csrc/models/general.h @@ -1,5 +1,4 @@ -#ifndef VISION_GENERAL_H -#define VISION_GENERAL_H +#pragma once #ifdef _WIN32 #if defined(torchvision_EXPORTS) @@ -10,5 +9,3 @@ #else #define VISION_API #endif - -#endif // VISION_GENERAL_H \ No newline at end of file diff --git a/torchvision/csrc/models/googlenet.h b/torchvision/csrc/models/googlenet.h index 34b0cf5077f..d5192c7623f 100644 --- a/torchvision/csrc/models/googlenet.h +++ b/torchvision/csrc/models/googlenet.h @@ -1,5 +1,4 @@ -#ifndef GOOGLENET_H -#define GOOGLENET_H +#pragma once #include #include "general.h" @@ -86,5 +85,3 @@ TORCH_MODULE(GoogLeNet); } // namespace models } // namespace vision - -#endif // GOOGLENET_H diff --git a/torchvision/csrc/models/inception.h b/torchvision/csrc/models/inception.h index 58f1a7c1054..3f964e3103c 100644 --- a/torchvision/csrc/models/inception.h +++ b/torchvision/csrc/models/inception.h @@ -1,5 +1,4 @@ -#ifndef INCEPTION_H -#define INCEPTION_H +#pragma once #include #include "general.h" @@ -124,5 +123,3 @@ TORCH_MODULE(InceptionV3); } // namespace models } // namespace vision - -#endif // INCEPTION_H diff --git a/torchvision/csrc/models/mnasnet.h b/torchvision/csrc/models/mnasnet.h index 6f45101f332..ae136cd5b30 100644 --- a/torchvision/csrc/models/mnasnet.h +++ b/torchvision/csrc/models/mnasnet.h @@ -1,5 +1,4 @@ -#ifndef MNASNET_H -#define MNASNET_H +#pragma once #include #include "general.h" @@ -43,5 +42,3 @@ TORCH_MODULE(MNASNet1_3); } // namespace models } // namespace vision - -#endif // MNASNET_H diff --git a/torchvision/csrc/models/mobilenet.h b/torchvision/csrc/models/mobilenet.h index 6af0a597e1f..7e3f8596692 100644 --- a/torchvision/csrc/models/mobilenet.h +++ b/torchvision/csrc/models/mobilenet.h @@ -1,5 +1,4 @@ -#ifndef MOBILENET_H -#define MOBILENET_H +#pragma once #include #include "general.h" @@ -22,5 +21,3 @@ struct VISION_API MobileNetV2Impl : torch::nn::Module { TORCH_MODULE(MobileNetV2); } // namespace models } // namespace vision - -#endif // MOBILENET_H diff --git a/torchvision/csrc/models/models.h b/torchvision/csrc/models/models.h index 1d47f2e3dd6..8376ed12020 100644 --- a/torchvision/csrc/models/models.h +++ b/torchvision/csrc/models/models.h @@ -1,5 +1,4 @@ -#ifndef MODELS_H -#define MODELS_H +#pragma once #include "alexnet.h" #include "densenet.h" @@ -11,5 +10,3 @@ #include "shufflenetv2.h" #include "squeezenet.h" #include "vgg.h" - -#endif // MODELS_H diff --git a/torchvision/csrc/models/modelsimpl.h b/torchvision/csrc/models/modelsimpl.h index 1dc8d06b15e..8f7663b32ad 100644 --- a/torchvision/csrc/models/modelsimpl.h +++ b/torchvision/csrc/models/modelsimpl.h @@ -1,5 +1,4 @@ -#ifndef MODELSIMPL_H -#define MODELSIMPL_H +#pragma once #include @@ -42,5 +41,3 @@ inline bool double_compare(double a, double b) { } // namespace modelsimpl } // namespace models } // namespace vision - -#endif // MODELSIMPL_H diff --git a/torchvision/csrc/models/resnet.h b/torchvision/csrc/models/resnet.h index e17dfe49a1a..ee4a8cd5284 100644 --- a/torchvision/csrc/models/resnet.h +++ b/torchvision/csrc/models/resnet.h @@ -1,5 +1,4 @@ -#ifndef RESNET_H -#define RESNET_H +#pragma once #include #include "general.h" @@ -256,5 +255,3 @@ TORCH_MODULE(WideResNet101_2); } // namespace models } // namespace vision - -#endif // RESNET_H diff --git a/torchvision/csrc/models/shufflenetv2.h b/torchvision/csrc/models/shufflenetv2.h index 96d53d554ab..8aa0ea69db4 100644 --- a/torchvision/csrc/models/shufflenetv2.h +++ b/torchvision/csrc/models/shufflenetv2.h @@ -1,5 +1,4 @@ -#ifndef SHUFFLENETV2_H -#define SHUFFLENETV2_H +#pragma once #include #include "general.h" @@ -44,5 +43,3 @@ TORCH_MODULE(ShuffleNetV2_x2_0); } // namespace models } // namespace vision - -#endif // SHUFFLENETV2_H diff --git a/torchvision/csrc/models/squeezenet.h b/torchvision/csrc/models/squeezenet.h index 6f55387de56..5cd3cb482e7 100644 --- a/torchvision/csrc/models/squeezenet.h +++ b/torchvision/csrc/models/squeezenet.h @@ -1,5 +1,4 @@ -#ifndef SQUEEZENET_H -#define SQUEEZENET_H +#pragma once #include #include "general.h" @@ -36,5 +35,3 @@ TORCH_MODULE(SqueezeNet1_1); } // namespace models } // namespace vision - -#endif // SQUEEZENET_H diff --git a/torchvision/csrc/models/vgg.h b/torchvision/csrc/models/vgg.h index b5c600a68ab..ea64dbfa151 100644 --- a/torchvision/csrc/models/vgg.h +++ b/torchvision/csrc/models/vgg.h @@ -1,5 +1,4 @@ -#ifndef VGG_H -#define VGG_H +#pragma once #include #include "general.h" @@ -89,5 +88,3 @@ TORCH_MODULE(VGG19BN); } // namespace models } // namespace vision - -#endif // VGG_H diff --git a/torchvision/csrc/vision.h b/torchvision/csrc/vision.h index 50bebab1fb1..c99b25c030d 100644 --- a/torchvision/csrc/vision.h +++ b/torchvision/csrc/vision.h @@ -1,5 +1,4 @@ -#ifndef VISION_H -#define VISION_H +#pragma once #include #include @@ -15,5 +14,3 @@ namespace detail { VISION_INLINE_VARIABLE int64_t _cuda_version = cuda_version(); } // namespace detail } // namespace vision - -#endif // VISION_H diff --git a/travis-scripts/run-clang-format/run-clang-format.py b/travis-scripts/run-clang-format/run-clang-format.py index 54e193db45b..fd2913bd70e 100755 --- a/travis-scripts/run-clang-format/run-clang-format.py +++ b/travis-scripts/run-clang-format/run-clang-format.py @@ -28,7 +28,7 @@ DEVNULL = open(os.devnull, "wb") -DEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx' +DEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx,cu' class ExitStatus: From 3592c6a11da0bf4d4fc3a05ef85321237544d124 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 2 Dec 2020 16:32:54 +0000 Subject: [PATCH 9/9] Adding operator methods in vision::ops namespace. (#3096) * Adding operator methods in vision::ops namespace. * Replace general.h with macros.h * Adding vision.h to the necessary cpp files. --- test/tracing/frcnn/test_frcnn_tracing.cpp | 2 +- torchvision/csrc/cpu/deform_conv2d_kernel.cpp | 6 ++++++ torchvision/csrc/cpu/deform_conv2d_kernel.h | 6 ++++++ torchvision/csrc/cpu/nms_kernel.cpp | 6 ++++++ torchvision/csrc/cpu/nms_kernel.h | 6 ++++++ torchvision/csrc/cpu/ps_roi_align_kernel.cpp | 6 ++++++ torchvision/csrc/cpu/ps_roi_align_kernel.h | 6 ++++++ torchvision/csrc/cpu/ps_roi_pool_kernel.cpp | 6 ++++++ torchvision/csrc/cpu/ps_roi_pool_kernel.h | 6 ++++++ torchvision/csrc/cpu/roi_align_kernel.cpp | 6 ++++++ torchvision/csrc/cpu/roi_align_kernel.h | 6 ++++++ torchvision/csrc/cpu/roi_pool_kernel.cpp | 6 ++++++ torchvision/csrc/cpu/roi_pool_kernel.h | 6 ++++++ torchvision/csrc/cuda/cuda_helpers.h | 6 ++++++ torchvision/csrc/cuda/deform_conv2d_kernel.cu | 6 ++++++ torchvision/csrc/cuda/deform_conv2d_kernel.h | 6 ++++++ torchvision/csrc/cuda/nms_kernel.cu | 6 ++++++ torchvision/csrc/cuda/nms_kernel.h | 6 ++++++ torchvision/csrc/cuda/ps_roi_align_kernel.cu | 6 ++++++ torchvision/csrc/cuda/ps_roi_align_kernel.h | 6 ++++++ torchvision/csrc/cuda/ps_roi_pool_kernel.cu | 6 ++++++ torchvision/csrc/cuda/ps_roi_pool_kernel.h | 6 ++++++ torchvision/csrc/cuda/roi_align_kernel.cu | 6 ++++++ torchvision/csrc/cuda/roi_align_kernel.h | 6 ++++++ torchvision/csrc/cuda/roi_pool_kernel.cu | 6 ++++++ torchvision/csrc/cuda/roi_pool_kernel.h | 6 ++++++ torchvision/csrc/deform_conv2d.cpp | 6 ++++++ torchvision/csrc/deform_conv2d.h | 6 ++++++ torchvision/csrc/models/alexnet.h | 2 +- torchvision/csrc/models/densenet.h | 2 +- torchvision/csrc/models/general.h | 11 ----------- torchvision/csrc/models/googlenet.h | 2 +- torchvision/csrc/models/inception.h | 2 +- torchvision/csrc/models/mnasnet.h | 2 +- torchvision/csrc/models/mobilenet.h | 2 +- torchvision/csrc/models/resnet.h | 2 +- torchvision/csrc/models/shufflenetv2.h | 2 +- torchvision/csrc/models/squeezenet.h | 2 +- torchvision/csrc/models/vgg.h | 2 +- torchvision/csrc/new_empty_tensor_op.cpp | 6 ++++++ torchvision/csrc/new_empty_tensor_op.h | 6 ++++++ torchvision/csrc/nms.cpp | 6 ++++++ torchvision/csrc/nms.h | 6 ++++++ torchvision/csrc/ps_roi_align.cpp | 6 ++++++ torchvision/csrc/ps_roi_align.h | 6 ++++++ torchvision/csrc/ps_roi_pool.cpp | 6 ++++++ torchvision/csrc/ps_roi_pool.h | 6 ++++++ torchvision/csrc/roi_align.cpp | 6 ++++++ torchvision/csrc/roi_align.h | 6 ++++++ torchvision/csrc/roi_pool.cpp | 6 ++++++ torchvision/csrc/roi_pool.h | 6 ++++++ torchvision/csrc/vision.cpp | 4 ++++ torchvision/csrc/vision.h | 1 - 53 files changed, 249 insertions(+), 23 deletions(-) delete mode 100644 torchvision/csrc/models/general.h diff --git a/test/tracing/frcnn/test_frcnn_tracing.cpp b/test/tracing/frcnn/test_frcnn_tracing.cpp index 90476b24f4b..7b10aee3c89 100644 --- a/test/tracing/frcnn/test_frcnn_tracing.cpp +++ b/test/tracing/frcnn/test_frcnn_tracing.cpp @@ -7,7 +7,7 @@ #ifdef _WIN32 // Windows only // This is necessary until operators are automatically registered on include -static auto _nms = &nms_cpu; +static auto _nms = &vision::ops::nms_cpu; #endif int main() { diff --git a/torchvision/csrc/cpu/deform_conv2d_kernel.cpp b/torchvision/csrc/cpu/deform_conv2d_kernel.cpp index 5cac99db04a..4ae2d0a02db 100644 --- a/torchvision/csrc/cpu/deform_conv2d_kernel.cpp +++ b/torchvision/csrc/cpu/deform_conv2d_kernel.cpp @@ -68,6 +68,9 @@ #include "deform_conv2d_kernel.h" +namespace vision { +namespace ops { + namespace { const int kMaxParallelImgs = 32; @@ -1137,3 +1140,6 @@ deform_conv2d_backward_cpu( return std::make_tuple( grad_input, grad_weight, grad_offset, grad_mask, grad_bias); } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/deform_conv2d_kernel.h b/torchvision/csrc/cpu/deform_conv2d_kernel.h index 2eb5ab37c6e..2a49bad8304 100644 --- a/torchvision/csrc/cpu/deform_conv2d_kernel.h +++ b/torchvision/csrc/cpu/deform_conv2d_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API at::Tensor deform_conv2d_forward_cpu( const at::Tensor& input, const at::Tensor& weight, @@ -37,3 +40,6 @@ VISION_API std:: int64_t n_weight_grps, int64_t n_offset_grps, bool use_mask); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/nms_kernel.cpp b/torchvision/csrc/cpu/nms_kernel.cpp index 52953a9e822..a77a6906870 100644 --- a/torchvision/csrc/cpu/nms_kernel.cpp +++ b/torchvision/csrc/cpu/nms_kernel.cpp @@ -1,5 +1,8 @@ #include "nms_kernel.h" +namespace vision { +namespace ops { + namespace { template @@ -103,3 +106,6 @@ at::Tensor nms_cpu( }); return result; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/nms_kernel.h b/torchvision/csrc/cpu/nms_kernel.h index 7b6ef442626..1fdcaf3d3f9 100644 --- a/torchvision/csrc/cpu/nms_kernel.h +++ b/torchvision/csrc/cpu/nms_kernel.h @@ -3,7 +3,13 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API at::Tensor nms_cpu( const at::Tensor& dets, const at::Tensor& scores, double iou_threshold); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/ps_roi_align_kernel.cpp b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp index 3d6c95f02ea..5abe4a41477 100644 --- a/torchvision/csrc/cpu/ps_roi_align_kernel.cpp +++ b/torchvision/csrc/cpu/ps_roi_align_kernel.cpp @@ -1,5 +1,8 @@ #include "ps_roi_align_kernel.h" +namespace vision { +namespace ops { + namespace { template @@ -416,3 +419,6 @@ at::Tensor ps_roi_align_backward_cpu( }); return grad_input; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/ps_roi_align_kernel.h b/torchvision/csrc/cpu/ps_roi_align_kernel.h index 86a3f9a8876..a4bea77853b 100644 --- a/torchvision/csrc/cpu/ps_roi_align_kernel.h +++ b/torchvision/csrc/cpu/ps_roi_align_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API std::tuple ps_roi_align_forward_cpu( const at::Tensor& input, const at::Tensor& rois, @@ -23,3 +26,6 @@ VISION_API at::Tensor ps_roi_align_backward_cpu( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp b/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp index cdee9b9f55c..425b4c68f1a 100644 --- a/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp +++ b/torchvision/csrc/cpu/ps_roi_pool_kernel.cpp @@ -1,5 +1,8 @@ #include "ps_roi_pool_kernel.h" +namespace vision { +namespace ops { + namespace { template @@ -255,3 +258,6 @@ at::Tensor ps_roi_pool_backward_cpu( }); return grad_input; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/ps_roi_pool_kernel.h b/torchvision/csrc/cpu/ps_roi_pool_kernel.h index 14a4e22681a..2cefe39e11e 100644 --- a/torchvision/csrc/cpu/ps_roi_pool_kernel.h +++ b/torchvision/csrc/cpu/ps_roi_pool_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API std::tuple ps_roi_pool_forward_cpu( const at::Tensor& input, const at::Tensor& rois, @@ -21,3 +24,6 @@ VISION_API at::Tensor ps_roi_pool_backward_cpu( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/roi_align_kernel.cpp b/torchvision/csrc/cpu/roi_align_kernel.cpp index 133722fdc5e..cbb75f2c474 100644 --- a/torchvision/csrc/cpu/roi_align_kernel.cpp +++ b/torchvision/csrc/cpu/roi_align_kernel.cpp @@ -1,5 +1,8 @@ #include "roi_align_kernel.h" +namespace vision { +namespace ops { + namespace { // implementation taken from Caffe2 @@ -494,3 +497,6 @@ at::Tensor roi_align_backward_cpu( }); return grad_input; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/roi_align_kernel.h b/torchvision/csrc/cpu/roi_align_kernel.h index 79fd46bd44e..2e7813c261c 100644 --- a/torchvision/csrc/cpu/roi_align_kernel.h +++ b/torchvision/csrc/cpu/roi_align_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API at::Tensor roi_align_forward_cpu( const at::Tensor& input, const at::Tensor& rois, @@ -24,3 +27,6 @@ VISION_API at::Tensor roi_align_backward_cpu( int64_t width, int64_t sampling_ratio, bool aligned); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/roi_pool_kernel.cpp b/torchvision/csrc/cpu/roi_pool_kernel.cpp index d622f2b430b..375b722684e 100644 --- a/torchvision/csrc/cpu/roi_pool_kernel.cpp +++ b/torchvision/csrc/cpu/roi_pool_kernel.cpp @@ -2,6 +2,9 @@ #include "roi_pool_kernel.h" +namespace vision { +namespace ops { + namespace { template @@ -231,3 +234,6 @@ at::Tensor roi_pool_backward_cpu( }); return grad_input; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cpu/roi_pool_kernel.h b/torchvision/csrc/cpu/roi_pool_kernel.h index 66fd993d5b4..33d029cf31a 100644 --- a/torchvision/csrc/cpu/roi_pool_kernel.h +++ b/torchvision/csrc/cpu/roi_pool_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API std::tuple roi_pool_forward_cpu( const at::Tensor& input, const at::Tensor& rois, @@ -21,3 +24,6 @@ VISION_API at::Tensor roi_pool_backward_cpu( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/cuda_helpers.h b/torchvision/csrc/cuda/cuda_helpers.h index a82e1c86f87..cec4a183899 100644 --- a/torchvision/csrc/cuda/cuda_helpers.h +++ b/torchvision/csrc/cuda/cuda_helpers.h @@ -1,5 +1,8 @@ #pragma once +namespace vision { +namespace ops { + #define CUDA_1D_KERNEL_LOOP(i, n) \ for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \ i += (blockDim.x * gridDim.x)) @@ -8,3 +11,6 @@ template constexpr __host__ __device__ inline integer ceil_div(integer n, integer m) { return (n + m - 1) / m; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/deform_conv2d_kernel.cu b/torchvision/csrc/cuda/deform_conv2d_kernel.cu index cef8124caf3..e530710863c 100644 --- a/torchvision/csrc/cuda/deform_conv2d_kernel.cu +++ b/torchvision/csrc/cuda/deform_conv2d_kernel.cu @@ -73,6 +73,9 @@ #include "cuda_helpers.h" #include "deform_conv2d_kernel.h" +namespace vision { +namespace ops { + namespace { const int kMaxParallelImgs = 32; @@ -1183,3 +1186,6 @@ deform_conv2d_backward_cuda( return std::make_tuple( grad_input, grad_weight, grad_offset, grad_mask, grad_bias); } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/deform_conv2d_kernel.h b/torchvision/csrc/cuda/deform_conv2d_kernel.h index 00f3f3dc15d..b2e3dc3f17f 100644 --- a/torchvision/csrc/cuda/deform_conv2d_kernel.h +++ b/torchvision/csrc/cuda/deform_conv2d_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API at::Tensor deform_conv2d_forward_cuda( const at::Tensor& input, const at::Tensor& weight, @@ -37,3 +40,6 @@ VISION_API std:: int64_t n_weight_grps, int64_t n_offset_grps, bool use_mask); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/nms_kernel.cu b/torchvision/csrc/cuda/nms_kernel.cu index ae244efebe7..b8d4b3ce0ec 100644 --- a/torchvision/csrc/cuda/nms_kernel.cu +++ b/torchvision/csrc/cuda/nms_kernel.cu @@ -4,6 +4,9 @@ #include "cuda_helpers.h" #include "nms_kernel.h" +namespace vision { +namespace ops { + namespace { int const threadsPerBlock = sizeof(unsigned long long) * 8; @@ -162,3 +165,6 @@ at::Tensor nms_cuda( {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep) .to(order_t.device(), keep.scalar_type())}); } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/nms_kernel.h b/torchvision/csrc/cuda/nms_kernel.h index 1eceddaccf3..0d2c0838437 100644 --- a/torchvision/csrc/cuda/nms_kernel.h +++ b/torchvision/csrc/cuda/nms_kernel.h @@ -3,7 +3,13 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API at::Tensor nms_cuda( const at::Tensor& dets, const at::Tensor& scores, double iou_threshold); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/ps_roi_align_kernel.cu b/torchvision/csrc/cuda/ps_roi_align_kernel.cu index 7c808580258..6b1e729b12d 100644 --- a/torchvision/csrc/cuda/ps_roi_align_kernel.cu +++ b/torchvision/csrc/cuda/ps_roi_align_kernel.cu @@ -5,6 +5,9 @@ #include "cuda_helpers.h" #include "ps_roi_align_kernel.h" +namespace vision { +namespace ops { + namespace { template @@ -434,3 +437,6 @@ at::Tensor ps_roi_align_backward_cuda( AT_CUDA_CHECK(cudaGetLastError()); return grad_input; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/ps_roi_align_kernel.h b/torchvision/csrc/cuda/ps_roi_align_kernel.h index 45a300d6711..c40e6fa55b1 100644 --- a/torchvision/csrc/cuda/ps_roi_align_kernel.h +++ b/torchvision/csrc/cuda/ps_roi_align_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API std::tuple ps_roi_align_forward_cuda( const at::Tensor& input, const at::Tensor& rois, @@ -23,3 +26,6 @@ VISION_API at::Tensor ps_roi_align_backward_cuda( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/ps_roi_pool_kernel.cu b/torchvision/csrc/cuda/ps_roi_pool_kernel.cu index ed0ed26484d..91fd25b4bb5 100644 --- a/torchvision/csrc/cuda/ps_roi_pool_kernel.cu +++ b/torchvision/csrc/cuda/ps_roi_pool_kernel.cu @@ -5,6 +5,9 @@ #include "cuda_helpers.h" #include "ps_roi_pool_kernel.h" +namespace vision { +namespace ops { + namespace { template @@ -270,3 +273,6 @@ at::Tensor ps_roi_pool_backward_cuda( AT_CUDA_CHECK(cudaGetLastError()); return grad_input; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/ps_roi_pool_kernel.h b/torchvision/csrc/cuda/ps_roi_pool_kernel.h index e97f0ee7065..21015d4693b 100644 --- a/torchvision/csrc/cuda/ps_roi_pool_kernel.h +++ b/torchvision/csrc/cuda/ps_roi_pool_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API std::tuple ps_roi_pool_forward_cuda( const at::Tensor& input, const at::Tensor& rois, @@ -21,3 +24,6 @@ VISION_API at::Tensor ps_roi_pool_backward_cuda( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/roi_align_kernel.cu b/torchvision/csrc/cuda/roi_align_kernel.cu index 195d8b067f4..59388faa6ad 100644 --- a/torchvision/csrc/cuda/roi_align_kernel.cu +++ b/torchvision/csrc/cuda/roi_align_kernel.cu @@ -5,6 +5,9 @@ #include "cuda_helpers.h" #include "roi_align_kernel.h" +namespace vision { +namespace ops { + namespace { template @@ -443,3 +446,6 @@ at::Tensor roi_align_backward_cuda( AT_CUDA_CHECK(cudaGetLastError()); return grad_input; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/roi_align_kernel.h b/torchvision/csrc/cuda/roi_align_kernel.h index 46054f04f38..71096201627 100644 --- a/torchvision/csrc/cuda/roi_align_kernel.h +++ b/torchvision/csrc/cuda/roi_align_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API at::Tensor roi_align_forward_cuda( const at::Tensor& input, const at::Tensor& rois, @@ -24,3 +27,6 @@ VISION_API at::Tensor roi_align_backward_cuda( int64_t width, int64_t sampling_ratio, bool aligned); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/roi_pool_kernel.cu b/torchvision/csrc/cuda/roi_pool_kernel.cu index 782ecaf9eb3..a96e79c87a9 100644 --- a/torchvision/csrc/cuda/roi_pool_kernel.cu +++ b/torchvision/csrc/cuda/roi_pool_kernel.cu @@ -6,6 +6,9 @@ #include "cuda_helpers.h" #include "roi_pool_kernel.h" +namespace vision { +namespace ops { + namespace { template @@ -254,3 +257,6 @@ at::Tensor roi_pool_backward_cuda( AT_CUDA_CHECK(cudaGetLastError()); return grad_input; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/cuda/roi_pool_kernel.h b/torchvision/csrc/cuda/roi_pool_kernel.h index 3a99f7521bd..71a649968db 100644 --- a/torchvision/csrc/cuda/roi_pool_kernel.h +++ b/torchvision/csrc/cuda/roi_pool_kernel.h @@ -3,6 +3,9 @@ #include #include "../macros.h" +namespace vision { +namespace ops { + VISION_API std::tuple roi_pool_forward_cuda( const at::Tensor& input, const at::Tensor& rois, @@ -21,3 +24,6 @@ VISION_API at::Tensor roi_pool_backward_cuda( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/deform_conv2d.cpp b/torchvision/csrc/deform_conv2d.cpp index 74ba630537a..e8a416683f2 100644 --- a/torchvision/csrc/deform_conv2d.cpp +++ b/torchvision/csrc/deform_conv2d.cpp @@ -5,6 +5,9 @@ #include #endif +namespace vision { +namespace ops { + at::Tensor deform_conv2d( const at::Tensor& input, const at::Tensor& weight, @@ -361,3 +364,6 @@ deform_conv2d_backward_autograd( return std::make_tuple(result[0], result[1], result[2], result[3], result[4]); } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/deform_conv2d.h b/torchvision/csrc/deform_conv2d.h index 6adc77fb888..85675ee6bf2 100644 --- a/torchvision/csrc/deform_conv2d.h +++ b/torchvision/csrc/deform_conv2d.h @@ -9,6 +9,9 @@ #include "hip/deform_conv2d_kernel.h" #endif +namespace vision { +namespace ops { + // C++ Forward at::Tensor deform_conv2d( const at::Tensor& input, @@ -98,3 +101,6 @@ deform_conv2d_backward_autograd( int64_t groups, int64_t offset_groups, bool use_mask); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/models/alexnet.h b/torchvision/csrc/models/alexnet.h index 33ffe379a97..d2529c88882 100644 --- a/torchvision/csrc/models/alexnet.h +++ b/torchvision/csrc/models/alexnet.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/densenet.h b/torchvision/csrc/models/densenet.h index 22db45b719d..9358631e4ba 100644 --- a/torchvision/csrc/models/densenet.h +++ b/torchvision/csrc/models/densenet.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/general.h b/torchvision/csrc/models/general.h deleted file mode 100644 index 4463786b4bb..00000000000 --- a/torchvision/csrc/models/general.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#ifdef _WIN32 -#if defined(torchvision_EXPORTS) -#define VISION_API __declspec(dllexport) -#else -#define VISION_API __declspec(dllimport) -#endif -#else -#define VISION_API -#endif diff --git a/torchvision/csrc/models/googlenet.h b/torchvision/csrc/models/googlenet.h index d5192c7623f..3d4b3faf7e4 100644 --- a/torchvision/csrc/models/googlenet.h +++ b/torchvision/csrc/models/googlenet.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/inception.h b/torchvision/csrc/models/inception.h index 3f964e3103c..08f329fbc1b 100644 --- a/torchvision/csrc/models/inception.h +++ b/torchvision/csrc/models/inception.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/mnasnet.h b/torchvision/csrc/models/mnasnet.h index ae136cd5b30..e69559b2dd7 100644 --- a/torchvision/csrc/models/mnasnet.h +++ b/torchvision/csrc/models/mnasnet.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/mobilenet.h b/torchvision/csrc/models/mobilenet.h index 7e3f8596692..63d26474b23 100644 --- a/torchvision/csrc/models/mobilenet.h +++ b/torchvision/csrc/models/mobilenet.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/resnet.h b/torchvision/csrc/models/resnet.h index ee4a8cd5284..b8caf4332c6 100644 --- a/torchvision/csrc/models/resnet.h +++ b/torchvision/csrc/models/resnet.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/shufflenetv2.h b/torchvision/csrc/models/shufflenetv2.h index 8aa0ea69db4..ea8524c72fa 100644 --- a/torchvision/csrc/models/shufflenetv2.h +++ b/torchvision/csrc/models/shufflenetv2.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/squeezenet.h b/torchvision/csrc/models/squeezenet.h index 5cd3cb482e7..c43d2b07f13 100644 --- a/torchvision/csrc/models/squeezenet.h +++ b/torchvision/csrc/models/squeezenet.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/models/vgg.h b/torchvision/csrc/models/vgg.h index ea64dbfa151..4a540bea822 100644 --- a/torchvision/csrc/models/vgg.h +++ b/torchvision/csrc/models/vgg.h @@ -1,7 +1,7 @@ #pragma once #include -#include "general.h" +#include "../macros.h" namespace vision { namespace models { diff --git a/torchvision/csrc/new_empty_tensor_op.cpp b/torchvision/csrc/new_empty_tensor_op.cpp index e4f31600c54..768b5599bf9 100644 --- a/torchvision/csrc/new_empty_tensor_op.cpp +++ b/torchvision/csrc/new_empty_tensor_op.cpp @@ -3,6 +3,9 @@ #include "new_empty_tensor_op.h" #include +namespace vision { +namespace ops { + namespace { class NewEmptyTensorOp : public torch::autograd::Function { @@ -33,3 +36,6 @@ at::Tensor new_empty_tensor( const c10::List& shape) { return NewEmptyTensorOp::apply(input, shape)[0]; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/new_empty_tensor_op.h b/torchvision/csrc/new_empty_tensor_op.h index 75f4cd5a7fe..f00cb67b779 100644 --- a/torchvision/csrc/new_empty_tensor_op.h +++ b/torchvision/csrc/new_empty_tensor_op.h @@ -2,6 +2,12 @@ #include +namespace vision { +namespace ops { + at::Tensor new_empty_tensor( const at::Tensor& input, const c10::List& shape); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/nms.cpp b/torchvision/csrc/nms.cpp index 075f3101937..2f9dbee9a32 100644 --- a/torchvision/csrc/nms.cpp +++ b/torchvision/csrc/nms.cpp @@ -5,6 +5,9 @@ #include #endif +namespace vision { +namespace ops { + at::Tensor nms( const at::Tensor& dets, const at::Tensor& scores, @@ -27,3 +30,6 @@ at::Tensor nms_autocast( iou_threshold); } #endif + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.h index 87b07548454..ac7cbc53caf 100644 --- a/torchvision/csrc/nms.h +++ b/torchvision/csrc/nms.h @@ -9,6 +9,9 @@ #include "hip/nms_kernel.h" #endif +namespace vision { +namespace ops { + // C++ Forward at::Tensor nms( const at::Tensor& dets, @@ -22,3 +25,6 @@ at::Tensor nms_autocast( const at::Tensor& scores, double iou_threshold); #endif + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/ps_roi_align.cpp b/torchvision/csrc/ps_roi_align.cpp index 0e1a30d6e63..5add21aaeec 100644 --- a/torchvision/csrc/ps_roi_align.cpp +++ b/torchvision/csrc/ps_roi_align.cpp @@ -5,6 +5,9 @@ #include #endif +namespace vision { +namespace ops { + std::tuple ps_roi_align( const at::Tensor& input, const at::Tensor& rois, @@ -218,3 +221,6 @@ at::Tensor ps_roi_align_backward_autograd( height, width)[0]; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/ps_roi_align.h b/torchvision/csrc/ps_roi_align.h index 0f7ecea2f12..c21107df4f4 100644 --- a/torchvision/csrc/ps_roi_align.h +++ b/torchvision/csrc/ps_roi_align.h @@ -9,6 +9,9 @@ #include "hip/ps_roi_align_kernel.h" #endif +namespace vision { +namespace ops { + // C++ Forward std::tuple ps_roi_align( const at::Tensor& input, @@ -64,3 +67,6 @@ at::Tensor ps_roi_align_backward_autograd( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/ps_roi_pool.cpp b/torchvision/csrc/ps_roi_pool.cpp index 76fb2d04be7..88a733a6369 100644 --- a/torchvision/csrc/ps_roi_pool.cpp +++ b/torchvision/csrc/ps_roi_pool.cpp @@ -5,6 +5,9 @@ #include #endif +namespace vision { +namespace ops { + std::tuple ps_roi_pool( const at::Tensor& input, const at::Tensor& rois, @@ -197,3 +200,6 @@ at::Tensor ps_roi_pool_backward_autograd( height, width)[0]; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/ps_roi_pool.h b/torchvision/csrc/ps_roi_pool.h index 0c8baef4a9a..20ae17d3ad1 100644 --- a/torchvision/csrc/ps_roi_pool.h +++ b/torchvision/csrc/ps_roi_pool.h @@ -9,6 +9,9 @@ #include "hip/ps_roi_pool_kernel.h" #endif +namespace vision { +namespace ops { + // C++ Forward std::tuple ps_roi_pool( const at::Tensor& input, @@ -59,3 +62,6 @@ at::Tensor ps_roi_pool_backward_autograd( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/roi_align.cpp b/torchvision/csrc/roi_align.cpp index 30eda8612d2..63643a6cb46 100644 --- a/torchvision/csrc/roi_align.cpp +++ b/torchvision/csrc/roi_align.cpp @@ -5,6 +5,9 @@ #include #endif +namespace vision { +namespace ops { + at::Tensor roi_align( const at::Tensor& input, // Input feature map. const at::Tensor& rois, // List of ROIs to pool over. @@ -227,3 +230,6 @@ at::Tensor roi_align_backward_autograd( sampling_ratio, aligned)[0]; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/roi_align.h b/torchvision/csrc/roi_align.h index d9bae4ba2a1..1e92c8d2134 100644 --- a/torchvision/csrc/roi_align.h +++ b/torchvision/csrc/roi_align.h @@ -9,6 +9,9 @@ #include "hip/roi_align_kernel.h" #endif +namespace vision { +namespace ops { + // C++ Forward at::Tensor roi_align( const at::Tensor& input, @@ -67,3 +70,6 @@ at::Tensor roi_align_backward_autograd( int64_t width, int64_t sampling_ratio, bool aligned); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/roi_pool.cpp b/torchvision/csrc/roi_pool.cpp index c8d70bd8940..b2948e6dd23 100644 --- a/torchvision/csrc/roi_pool.cpp +++ b/torchvision/csrc/roi_pool.cpp @@ -5,6 +5,9 @@ #include #endif +namespace vision { +namespace ops { + std::tuple roi_pool( const at::Tensor& input, const at::Tensor& rois, @@ -196,3 +199,6 @@ at::Tensor roi_pool_backward_autograd( height, width)[0]; } + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/roi_pool.h b/torchvision/csrc/roi_pool.h index f528ce6d7e0..ac005914107 100644 --- a/torchvision/csrc/roi_pool.h +++ b/torchvision/csrc/roi_pool.h @@ -9,6 +9,9 @@ #include "hip/roi_pool_kernel.h" #endif +namespace vision { +namespace ops { + // C++ Forward std::tuple roi_pool( const at::Tensor& input, @@ -59,3 +62,6 @@ at::Tensor roi_pool_backward_autograd( int64_t channels, int64_t height, int64_t width); + +} // namespace ops +} // namespace vision diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index fb0bf014912..b24b6c3df45 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -1,3 +1,5 @@ +#include "vision.h" + #include #include @@ -35,6 +37,8 @@ int64_t cuda_version() noexcept { } } // namespace vision +using namespace vision::ops; + TORCH_LIBRARY(torchvision, m) { m.def( "deform_conv2d(Tensor input, Tensor weight, Tensor offset, Tensor mask, Tensor bias, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int groups, int offset_groups, bool use_mask) -> Tensor"); diff --git a/torchvision/csrc/vision.h b/torchvision/csrc/vision.h index c99b25c030d..91935be6fdd 100644 --- a/torchvision/csrc/vision.h +++ b/torchvision/csrc/vision.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include "macros.h"