Skip to content

Encapsulate and standardize roi_pool #3088

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include <TH/TH.h>
#include <algorithm>
#include <float.h>

#include "roi_pool_kernel.h"

namespace {

template <class T>
inline void add(T* address, const T& val) {
*address += val;
}

template <typename T>
void RoIPoolForward(
void roi_pool_forward_kernel_impl(
const T* input,
const T spatial_scale,
int channels,
Expand Down Expand Up @@ -78,7 +79,7 @@ void RoIPoolForward(
}

template <typename T>
void RoIPoolBackward(
void roi_pool_backward_kernel_impl(
const T* grad_output,
const int* argmax_data,
int num_rois,
Expand Down Expand Up @@ -120,7 +121,9 @@ void RoIPoolBackward(
} // num_rois
}

std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(
} // namespace

std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
Expand All @@ -131,7 +134,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(

at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};

at::CheckedFrom c = "ROIPool_forward_cpu";
at::CheckedFrom c = "roi_pool_forward_cpu";
at::checkAllSameType(c, {input_t, rois_t});

int num_rois = rois.size(0);
Expand All @@ -151,8 +154,8 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(

auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "ROIPool_forward", [&] {
RoIPoolForward<scalar_t>(
input.scalar_type(), "roi_pool_forward", [&] {
roi_pool_forward_kernel_impl<scalar_t>(
input_.data_ptr<scalar_t>(),
spatial_scale,
channels,
Expand All @@ -168,7 +171,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(
return std::make_tuple(output, argmax);
}

at::Tensor ROIPool_backward_cpu(
at::Tensor roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
Expand All @@ -188,7 +191,7 @@ at::Tensor ROIPool_backward_cpu(

at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};

at::CheckedFrom c = "ROIPool_backward_cpu";
at::CheckedFrom c = "roi_pool_backward_cpu";
at::checkAllSameType(c, {grad_t, rois_t});

auto num_rois = rois.size(0);
Expand All @@ -209,8 +212,8 @@ at::Tensor ROIPool_backward_cpu(

auto rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "ROIPool_backward", [&] {
RoIPoolBackward<scalar_t>(
grad.scalar_type(), "roi_pool_backward", [&] {
roi_pool_backward_kernel_impl<scalar_t>(
grad.data_ptr<scalar_t>(),
argmax.data_ptr<int>(),
num_rois,
Expand Down
23 changes: 23 additions & 0 deletions torchvision/csrc/cpu/roi_pool_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

#include <ATen/ATen.h>
#include "../macros.h"

VISION_API std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

VISION_API at::Tensor roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
19 changes: 0 additions & 19 deletions torchvision/csrc/cpu/vision_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,3 @@
#include "../macros.h"

// TODO: Delete this file once all the methods are gone

VISION_API std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

VISION_API at::Tensor ROIPool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <float.h>
#include <THC/THCAtomics.cuh>

#include "cuda_helpers.h"
#include "roi_pool_kernel.h"

namespace {

template <typename T>
__global__ void RoIPoolForward(
__global__ void roi_pool_forward_kernel_impl(
int nthreads,
const T* input,
const T spatial_scale,
Expand Down Expand Up @@ -72,7 +74,7 @@ __global__ void RoIPoolForward(
}

template <typename T>
__global__ void RoIPoolBackward(
__global__ void roi_pool_backward_kernel_impl(
int nthreads,
const T* grad_output,
const int* argmax_data,
Expand Down Expand Up @@ -115,7 +117,9 @@ __global__ void RoIPoolBackward(
}
}

std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(
} // namespace

std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
Expand All @@ -128,7 +132,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(

at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};

at::CheckedFrom c = "ROIPool_forward_cuda";
at::CheckedFrom c = "roi_pool_forward_cuda";
at::checkAllSameGPU(c, {input_t, rois_t});
at::checkAllSameType(c, {input_t, rois_t});

Expand Down Expand Up @@ -160,8 +164,8 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(

auto input_ = input.contiguous(),
rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "ROIPool_forward", [&] {
RoIPoolForward<scalar_t><<<grid, block, 0, stream>>>(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "roi_pool_forward", [&] {
roi_pool_forward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>(
output_size,
input_.data_ptr<scalar_t>(),
spatial_scale,
Expand All @@ -178,7 +182,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(
return std::make_tuple(output, argmax);
}

at::Tensor ROIPool_backward_cuda(
at::Tensor roi_pool_backward_cuda(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
Expand All @@ -197,7 +201,7 @@ at::Tensor ROIPool_backward_cuda(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
argmax_t{argmax, "argmax", 3};

at::CheckedFrom c = "ROIPool_backward_cuda";
at::CheckedFrom c = "roi_pool_backward_cuda";
at::checkAllSameGPU(c, {grad_t, rois_t, argmax_t});
at::checkAllSameType(c, {grad_t, rois_t});

Expand Down Expand Up @@ -228,8 +232,8 @@ at::Tensor ROIPool_backward_cuda(

auto argmax_ = argmax.contiguous(),
rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "ROIPool_backward", [&] {
RoIPoolBackward<scalar_t><<<grid, block, 0, stream>>>(
AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "roi_pool_backward", [&] {
roi_pool_backward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>(
grad.numel(),
grad.data_ptr<scalar_t>(),
argmax_.data_ptr<int>(),
Expand Down
23 changes: 23 additions & 0 deletions torchvision/csrc/cuda/roi_pool_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

#include <ATen/ATen.h>
#include "../macros.h"

VISION_API std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

VISION_API at::Tensor roi_pool_backward_cuda(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
19 changes: 0 additions & 19 deletions torchvision/csrc/cuda/vision_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,3 @@
#include "../macros.h"

// TODO: Delete this file once all the methods are gone

VISION_API std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
const double spatial_scale,
const int64_t pooled_height,
const int64_t pooled_width);

VISION_API at::Tensor ROIPool_backward_cuda(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
const double spatial_scale,
const int64_t pooled_height,
const int64_t pooled_width,
const int64_t batch_size,
const int64_t channels,
const int64_t height,
const int64_t width);
26 changes: 11 additions & 15 deletions torchvision/csrc/ROIPool.h → torchvision/csrc/roi_pool.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
#pragma once
#include "roi_pool.h"
#include <torch/extension.h>

#include "cpu/vision_cpu.h"

#ifdef WITH_CUDA
#include "autocast.h"
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "autocast.h"
#include "hip/vision_cuda.h"
#if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#endif

// TODO: put this stuff in torchvision namespace

std::tuple<at::Tensor, at::Tensor> roi_pool(
const at::Tensor& input,
const at::Tensor& rois,
Expand All @@ -26,7 +18,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool(
}

#if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> ROIPool_autocast(
std::tuple<at::Tensor, at::Tensor> roi_pool_autocast(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
Expand Down Expand Up @@ -73,6 +65,8 @@ at::Tensor _roi_pool_backward(
width);
}

namespace {

class ROIPoolFunction : public torch::autograd::Function<ROIPoolFunction> {
public:
static torch::autograd::variable_list forward(
Expand Down Expand Up @@ -165,7 +159,9 @@ class ROIPoolBackwardFunction
}
};

std::tuple<at::Tensor, at::Tensor> ROIPool_autograd(
} // namespace

std::tuple<at::Tensor, at::Tensor> roi_pool_autograd(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
Expand All @@ -177,7 +173,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_autograd(
return std::make_tuple(result[0], result[1]);
}

at::Tensor ROIPool_backward_autograd(
at::Tensor roi_pool_backward_autograd(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
Expand Down
61 changes: 61 additions & 0 deletions torchvision/csrc/roi_pool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#pragma once

#include "cpu/roi_pool_kernel.h"

#ifdef WITH_CUDA
#include "cuda/roi_pool_kernel.h"
#endif
#ifdef WITH_HIP
#include "hip/roi_pool_kernel.h"
#endif

// C++ Forward
std::tuple<at::Tensor, at::Tensor> roi_pool(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

// Autocast Forward
#if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> roi_pool_autocast(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
#endif

// C++ Backward
at::Tensor _roi_pool_backward(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);

// Autograd Forward and Backward
std::tuple<at::Tensor, at::Tensor> roi_pool_autograd(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);

at::Tensor roi_pool_backward_autograd(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
Loading