diff --git a/modules/rgbd/samples/large_kinfu_demo.cpp b/modules/rgbd/samples/large_kinfu_demo.cpp index 40d14ca8cae..e78c3511215 100644 --- a/modules/rgbd/samples/large_kinfu_demo.cpp +++ b/modules/rgbd/samples/large_kinfu_demo.cpp @@ -127,8 +127,7 @@ int main(int argc, char** argv) // These params can be different for each depth sensor ds->updateParams(*params); - // Disabled until there is no OpenCL accelerated HashTSDF is available - cv::setUseOptimized(false); + cv::setUseOptimized(true); if (!idle) largeKinfu = LargeKinfu::create(params); diff --git a/modules/rgbd/src/hash_tsdf.cpp b/modules/rgbd/src/hash_tsdf.cpp index afdeef37063..08338fe83ef 100644 --- a/modules/rgbd/src/hash_tsdf.cpp +++ b/modules/rgbd/src/hash_tsdf.cpp @@ -15,9 +15,10 @@ #include "opencv2/core/utility.hpp" #include "opencv2/core/utils/trace.hpp" #include "utils.hpp" +#include "opencl_kernels_rgbd.hpp" #define USE_INTERPOLATION_IN_GETNORMAL 1 -#define VOLUMES_SIZE 1024 +#define VOLUMES_SIZE 8192 namespace cv { @@ -35,13 +36,17 @@ HashTSDFVolume::HashTSDFVolume(float _voxelSize, cv::Matx44f _pose, float _rayca zFirstMemOrder(_zFirstMemOrder) { truncDist = std::max(_truncDist, 4.0f * voxelSize); -} -HashTSDFVolumeCPU::HashTSDFVolumeCPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, - int _maxWeight, float _truncateThreshold, int _volumeUnitRes, bool _zFirstMemOrder) - :HashTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold, _volumeUnitRes, - _zFirstMemOrder) -{ + if (!(volumeUnitResolution & (volumeUnitResolution - 1))) + { + // vuRes is a power of 2, let's get this power + volumeUnitDegree = trailingZeros32(volumeUnitResolution); + } + else + { + CV_Error(Error::StsBadArg, "Volume unit resolution should be a power of 2"); + } + int xdim, ydim, zdim; if (zFirstMemOrder) { @@ -56,21 +61,110 @@ HashTSDFVolumeCPU::HashTSDFVolumeCPU(float _voxelSize, const Matx44f& _pose, flo zdim = volumeUnitResolution * volumeUnitResolution; } volStrides = Vec4i(xdim, ydim, zdim); +} + +//! Spatial hashing +struct tsdf_hash +{ + size_t operator()(const Vec3i& x) const noexcept + { + size_t seed = 0; + constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9; + for (uint16_t i = 0; i < 3; i++) + { + seed ^= std::hash()(x[i]) + GOLDEN_RATIO + (seed << 6) + (seed >> 2); + } + return seed; + } +}; + +struct VolumeUnit +{ + cv::Vec3i coord; + int index; + cv::Matx44f pose; + int lastVisibleIndex = 0; + bool isActive; +}; + +typedef std::unordered_set VolumeUnitIndexSet; +typedef std::unordered_map VolumeUnitIndexes; + +class HashTSDFVolumeCPU : public HashTSDFVolume +{ +public: + // dimension in voxels, size in meters + HashTSDFVolumeCPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, int _maxWeight, + float _truncateThreshold, int _volumeUnitRes, bool zFirstMemOrder = true); + + HashTSDFVolumeCPU(const VolumeParams& _volumeParams, bool zFirstMemOrder = true); + + void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const kinfu::Intr& intrinsics, + const int frameId = 0) override; + void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize, OutputArray points, + OutputArray normals) const override; + + void fetchNormals(InputArray points, OutputArray _normals) const override; + void fetchPointsNormals(OutputArray points, OutputArray normals) const override; + + void reset() override; + size_t getTotalVolumeUnits() const override { return volumeUnits.size(); } + int getVisibleBlocks(int currFrameId, int frameThreshold) const override; + + //! Return the voxel given the voxel index in the universal volume (1 unit = 1 voxel_length) + TsdfVoxel at(const Vec3i& volumeIdx) const; + + //! Return the voxel given the point in volume coordinate system i.e., (metric scale 1 unit = + //! 1m) + virtual TsdfVoxel at(const cv::Point3f& point) const; + virtual TsdfVoxel _at(const cv::Vec3i& volumeIdx, int indx) const; + + TsdfVoxel atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, VolumeUnitIndexes::const_iterator it) const; + + float interpolateVoxelPoint(const Point3f& point) const; + float interpolateVoxel(const cv::Point3f& point) const; + Point3f getNormalVoxel(const cv::Point3f& p) const; + + //! Utility functions for coordinate transformations + Vec3i volumeToVolumeUnitIdx(const Point3f& point) const; + Point3f volumeUnitIdxToVolume(const Vec3i& volumeUnitIdx) const; + + Point3f voxelCoordToVolume(const Vec3i& voxelIdx) const; + Vec3i volumeToVoxelCoord(const Point3f& point) const; + +public: + Vec6f frameParams; + Mat pixNorms; + VolumeUnitIndexes volumeUnits; + cv::Mat volUnitsData; + int lastVolIndex; +}; + + +HashTSDFVolumeCPU::HashTSDFVolumeCPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, + int _maxWeight, float _truncateThreshold, int _volumeUnitRes, bool _zFirstMemOrder) + :HashTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold, _volumeUnitRes, + _zFirstMemOrder) +{ reset(); } HashTSDFVolumeCPU::HashTSDFVolumeCPU(const VolumeParams& _params, bool _zFirstMemOrder) - : HashTSDFVolume(_params.voxelSize, _params.pose.matrix, _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight, + : HashTSDFVolumeCPU(_params.voxelSize, _params.pose.matrix, _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight, _params.depthTruncThreshold, _params.unitResolution, _zFirstMemOrder) { } + // zero volume, leave rest params the same void HashTSDFVolumeCPU::reset() { CV_TRACE_FUNCTION(); lastVolIndex = 0; volUnitsData = cv::Mat(VOLUMES_SIZE, volumeUnitResolution * volumeUnitResolution * volumeUnitResolution, rawType()); + frameParams = Vec6f(); + pixNorms = Mat(); + volumeUnits = VolumeUnitIndexes(); } void HashTSDFVolumeCPU::integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics, const int frameId) @@ -81,7 +175,7 @@ void HashTSDFVolumeCPU::integrate(InputArray _depth, float depthFactor, const Ma Depth depth = _depth.getMat(); //! Compute volumes to be allocated - const int depthStride = int(log2(volumeUnitResolution)); + const int depthStride = volumeUnitDegree; const float invDepthFactor = 1.f / depthFactor; const Intr::Reprojector reproj(intrinsics.makeReprojector()); const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose)); @@ -111,7 +205,7 @@ void HashTSDFVolumeCPU::integrate(InputArray _depth, float depthFactor, const Ma for (int k = lower_bound[2]; k <= upper_bound[2]; k++) { const Vec3i tsdf_idx = Vec3i(i, j, k); - if (!localAccessVolUnits.count(tsdf_idx)) + if (localAccessVolUnits.count(tsdf_idx) <= 0 && this->volumeUnits.count(tsdf_idx) <= 0) { //! This volume unit will definitely be required for current integration localAccessVolUnits.emplace(tsdf_idx); @@ -124,10 +218,9 @@ void HashTSDFVolumeCPU::integrate(InputArray _depth, float depthFactor, const Ma for (const auto& tsdf_idx : localAccessVolUnits) { //! If the insert into the global set passes - if (!this->volumeUnits.count(tsdf_idx)) + if (!newIndices.count(tsdf_idx)) { // Volume allocation can be performed outside of the lock - this->volumeUnits.emplace(tsdf_idx, VolumeUnit()); newIndices.emplace(tsdf_idx); } } @@ -138,12 +231,13 @@ void HashTSDFVolumeCPU::integrate(InputArray _depth, float depthFactor, const Ma //! Perform the allocation for (auto idx : newIndices) { - VolumeUnit& vu = volumeUnits[idx]; + VolumeUnit& vu = this->volumeUnits.emplace(idx, VolumeUnit()).first->second; + Matx44f subvolumePose = pose.translate(volumeUnitIdxToVolume(idx)).matrix; vu.pose = subvolumePose; vu.index = lastVolIndex; lastVolIndex++; - if (lastVolIndex > VolumeIndex(volUnitsData.size().height)) + if (lastVolIndex > int(volUnitsData.size().height)) { volUnitsData.resize((lastVolIndex - 1) * 2); } @@ -175,14 +269,14 @@ void HashTSDFVolumeCPU::integrate(InputArray _depth, float depthFactor, const Ma Vec3i tsdf_idx = totalVolUnits[i]; VolumeUnitIndexes::iterator it = volumeUnits.find(tsdf_idx); if (it == volumeUnits.end()) - return; + continue; Point3f volumeUnitPos = volumeUnitIdxToVolume(it->first); Point3f volUnitInCamSpace = vol2cam * volumeUnitPos; if (volUnitInCamSpace.z < 0 || volUnitInCamSpace.z > truncateThreshold) { it->second.isActive = false; - return; + continue; } Point2f cameraPoint = proj(volUnitInCamSpace); if (cameraPoint.x >= 0 && cameraPoint.y >= 0 && cameraPoint.x < depth.cols && cameraPoint.y < depth.rows) @@ -250,17 +344,14 @@ cv::Vec3i HashTSDFVolumeCPU::volumeToVoxelCoord(const cv::Point3f& point) const cvFloor(point.z * voxelSizeInv)); } -inline TsdfVoxel HashTSDFVolumeCPU::_at(const cv::Vec3i& volumeIdx, VolumeIndex indx) const +inline TsdfVoxel HashTSDFVolumeCPU::_at(const cv::Vec3i& volumeIdx, int indx) const { //! Out of bounds if ((volumeIdx[0] >= volumeUnitResolution || volumeIdx[0] < 0) || (volumeIdx[1] >= volumeUnitResolution || volumeIdx[1] < 0) || (volumeIdx[2] >= volumeUnitResolution || volumeIdx[2] < 0)) { - TsdfVoxel dummy; - dummy.tsdf = floatToTsdf(1.0f); - dummy.weight = 0; - return dummy; + return TsdfVoxel(floatToTsdf(1.f), 0); } const TsdfVoxel* volData = volUnitsData.ptr(indx); @@ -270,25 +361,21 @@ inline TsdfVoxel HashTSDFVolumeCPU::_at(const cv::Vec3i& volumeIdx, VolumeIndex } inline TsdfVoxel HashTSDFVolumeCPU::at(const cv::Vec3i& volumeIdx) const - { - Vec3i volumeUnitIdx = Vec3i(cvFloor(volumeIdx[0] / volumeUnitResolution), - cvFloor(volumeIdx[1] / volumeUnitResolution), - cvFloor(volumeIdx[2] / volumeUnitResolution)); + Vec3i volumeUnitIdx = Vec3i(volumeIdx[0] >> volumeUnitDegree, + volumeIdx[1] >> volumeUnitDegree, + volumeIdx[2] >> volumeUnitDegree); VolumeUnitIndexes::const_iterator it = volumeUnits.find(volumeUnitIdx); if (it == volumeUnits.end()) { - TsdfVoxel dummy; - dummy.tsdf = floatToTsdf(1.f); - dummy.weight = 0; - return dummy; + return TsdfVoxel(floatToTsdf(1.f), 0); } - cv::Vec3i volUnitLocalIdx = volumeIdx - cv::Vec3i(volumeUnitIdx[0] * volumeUnitResolution, - volumeUnitIdx[1] * volumeUnitResolution, - volumeUnitIdx[2] * volumeUnitResolution); + cv::Vec3i volUnitLocalIdx = volumeIdx - cv::Vec3i(volumeUnitIdx[0] << volumeUnitDegree, + volumeUnitIdx[1] << volumeUnitDegree, + volumeUnitIdx[2] << volumeUnitDegree); volUnitLocalIdx = cv::Vec3i(abs(volUnitLocalIdx[0]), abs(volUnitLocalIdx[1]), abs(volUnitLocalIdx[2])); @@ -298,15 +385,12 @@ inline TsdfVoxel HashTSDFVolumeCPU::at(const cv::Vec3i& volumeIdx) const TsdfVoxel HashTSDFVolumeCPU::at(const Point3f& point) const { - cv::Vec3i volumeUnitIdx = volumeToVolumeUnitIdx(point); + cv::Vec3i volumeUnitIdx = volumeToVolumeUnitIdx(point); VolumeUnitIndexes::const_iterator it = volumeUnits.find(volumeUnitIdx); if (it == volumeUnits.end()) { - TsdfVoxel dummy; - dummy.tsdf = floatToTsdf(1.f); - dummy.weight = 0; - return dummy; + return TsdfVoxel(floatToTsdf(1.f), 0); } cv::Point3f volumeUnitPos = volumeUnitIdxToVolume(volumeUnitIdx); @@ -316,32 +400,15 @@ TsdfVoxel HashTSDFVolumeCPU::at(const Point3f& point) const return _at(volUnitLocalIdx, it->second.index); } -static inline Vec3i voxelToVolumeUnitIdx(const Vec3i& pt, const int vuRes) -{ - if (!(vuRes & (vuRes - 1))) - { - // vuRes is a power of 2, let's get this power - const int p2 = trailingZeros32(vuRes); - return Vec3i(pt[0] >> p2, pt[1] >> p2, pt[2] >> p2); - } - else - { - return Vec3i(cvFloor(float(pt[0]) / vuRes), - cvFloor(float(pt[1]) / vuRes), - cvFloor(float(pt[2]) / vuRes)); - } -} - TsdfVoxel HashTSDFVolumeCPU::atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, VolumeUnitIndexes::const_iterator it) const { if (it == volumeUnits.end()) { - TsdfVoxel dummy; - dummy.tsdf = floatToTsdf(1.f); - dummy.weight = 0; - return dummy; + return TsdfVoxel(floatToTsdf(1.f), 0); } - Vec3i volUnitLocalIdx = point - volumeUnitIdx * volumeUnitResolution; + Vec3i volUnitLocalIdx = point - Vec3i(volumeUnitIdx[0] << volumeUnitDegree, + volumeUnitIdx[1] << volumeUnitDegree, + volumeUnitIdx[2] << volumeUnitDegree); // expanding at(), removing bounds check const TsdfVoxel* volData = volUnitsData.ptr(it->second.index); @@ -349,8 +416,6 @@ TsdfVoxel HashTSDFVolumeCPU::atVolumeUnit(const Vec3i& point, const Vec3i& volum return volData[coordBase]; } - - #if USE_INTRINSICS inline float interpolate(float tx, float ty, float tz, float vx[8]) { @@ -413,7 +478,7 @@ float HashTSDFVolumeCPU::interpolateVoxelPoint(const Point3f& point) const { Vec3i pt = iv + neighbourCoords[i]; - Vec3i volumeUnitIdx = voxelToVolumeUnitIdx(pt, volumeUnitResolution); + Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree); int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4; auto it = iterMap[dictIdx]; if (!queried[dictIdx]) @@ -475,7 +540,7 @@ Point3f HashTSDFVolumeCPU::getNormalVoxel(const Point3f &point) const { Vec3i pt = iptVox + offsets[i]; - Vec3i volumeUnitIdx = voxelToVolumeUnitIdx(pt, volumeUnitResolution); + Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree); int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4; auto it = iterMap[dictIdx]; @@ -625,7 +690,6 @@ void HashTSDFVolumeCPU::raycast(const Matx44f& cameraPose, const kinfu::Intr& in cv::Vec3i(std::numeric_limits::min(), std::numeric_limits::min(), std::numeric_limits::min()); - float tprev = tcurr; float prevTsdf = volume.truncDist; Ptr currVolumeUnit; @@ -634,7 +698,6 @@ void HashTSDFVolumeCPU::raycast(const Matx44f& cameraPose, const kinfu::Intr& in Point3f currRayPos = orig + tcurr * rayDirV; cv::Vec3i currVolumeUnitIdx = volume.volumeToVolumeUnitIdx(currRayPos); - VolumeUnitIndexes::const_iterator it = volume.volumeUnits.find(currVolumeUnitIdx); float currTsdf = prevTsdf; @@ -650,7 +713,6 @@ void HashTSDFVolumeCPU::raycast(const Matx44f& cameraPose, const kinfu::Intr& in volume.volumeUnitIdxToVolume(currVolumeUnitIdx); volUnitLocalIdx = volume.volumeToVoxelCoord(currRayPos - currVolUnitPos); - //! TODO: Figure out voxel interpolation TsdfVoxel currVoxel = _at(volUnitLocalIdx, it->second.index); currTsdf = tsdfToFloat(currVoxel.tsdf); @@ -708,17 +770,13 @@ void HashTSDFVolumeCPU::fetchPointsNormals(OutputArray _points, OutputArray _nor bool needNormals(_normals.needed()); Mutex mutex; - auto HashFetchPointsNormalsInvoker = [&](const Range& range) { - - std::vector points, normals; for (int i = range.start; i < range.end; i++) { cv::Vec3i tsdf_idx = totalVolUnits[i]; - VolumeUnitIndexes::const_iterator it = volume.volumeUnits.find(tsdf_idx); Point3f base_point = volume.volumeUnitIdxToVolume(tsdf_idx); if (it != volume.volumeUnits.end()) @@ -753,8 +811,6 @@ void HashTSDFVolumeCPU::fetchPointsNormals(OutputArray _points, OutputArray _nor parallel_for_(fetchRange, HashFetchPointsNormalsInvoker, nstripes); - - std::vector points, normals; for (size_t i = 0; i < pVecs.size(); i++) { @@ -817,5 +873,920 @@ int HashTSDFVolumeCPU::getVisibleBlocks(int currFrameId, int frameThreshold) con return numVisibleBlocks; } + +///////// GPU implementation ///////// + +#ifdef HAVE_OPENCL + +class HashTSDFVolumeGPU : public HashTSDFVolume +{ +public: + HashTSDFVolumeGPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, int _maxWeight, + float _truncateThreshold, int _volumeUnitRes, bool zFirstMemOrder = false); + + HashTSDFVolumeGPU(const VolumeParams& _volumeParams, bool zFirstMemOrder = false); + + void reset() override; + + void integrateAllVolumeUnitsGPU(const UMat& depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics); + + void allocateVolumeUnits(const UMat& depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics); + + void markActive(const Matx44f& cameraPose, const Intr& intrinsics, const Size frameSz, const int frameId); + + void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const kinfu::Intr& intrinsics, + const int frameId = 0) override; + void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize, OutputArray points, + OutputArray normals) const override; + + + void fetchNormals(InputArray points, OutputArray _normals) const override; + void fetchPointsNormals(OutputArray points, OutputArray normals) const override; + + size_t getTotalVolumeUnits() const override { return size_t(hashTable.last); } + int getVisibleBlocks(int currFrameId, int frameThreshold) const override; + + + + //! Return the voxel given the point in volume coordinate system i.e., (metric scale 1 unit = + //! 1m) + virtual TsdfVoxel new_at(const cv::Vec3i& volumeIdx, int indx) const; + TsdfVoxel new_atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, int indx) const; + + + float interpolateVoxelPoint(const Point3f& point) const; + float interpolateVoxel(const cv::Point3f& point) const; + Point3f getNormalVoxel(const cv::Point3f& p) const; + + //! Utility functions for coordinate transformations + Vec3i volumeToVolumeUnitIdx(const Point3f& point) const; + Point3f volumeUnitIdxToVolume(const Vec3i& volumeUnitIdx) const; + + Point3f voxelCoordToVolume(const Vec3i& voxelIdx) const; + Vec3i volumeToVoxelCoord(const Point3f& point) const; + +public: + Vec6f frameParams; + int bufferSizeDegree; + + // per-volume-unit data + cv::UMat lastVisibleIndices; + + cv::UMat isActiveFlags; + + cv::UMat volUnitsData; + //TODO: remove it when there's no CPU parts + cv::Mat volUnitsDataCopy; + + cv::UMat pixNorms; + + //TODO: move indexes.volumes to GPU + CustomHashSet hashTable; +}; + +HashTSDFVolumeGPU::HashTSDFVolumeGPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, int _maxWeight, + float _truncateThreshold, int _volumeUnitRes, bool _zFirstMemOrder) + :HashTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold, _volumeUnitRes, _zFirstMemOrder) +{ + if (truncDist >= volumeUnitSize) + { + std::cout << "truncDist exceeds volume unit size, allocation may work incorrectly" << std::endl; + } + + reset(); +} + +HashTSDFVolumeGPU::HashTSDFVolumeGPU(const VolumeParams & _params, bool _zFirstMemOrder) + : HashTSDFVolumeGPU(_params.voxelSize, _params.pose.matrix, _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight, + _params.depthTruncThreshold, _params.unitResolution, _zFirstMemOrder) +{ +} + +// zero volume, leave rest params the same +void HashTSDFVolumeGPU::reset() +{ + CV_TRACE_FUNCTION(); + + bufferSizeDegree = 15; + int buff_lvl = (int) (1 << bufferSizeDegree); + + int volCubed = volumeUnitResolution * volumeUnitResolution * volumeUnitResolution; + volUnitsDataCopy = cv::Mat(buff_lvl, volCubed, rawType()); + + volUnitsData = cv::UMat(buff_lvl, volCubed, CV_8UC2); + + lastVisibleIndices = cv::UMat(buff_lvl, 1, CV_32S); + + isActiveFlags = cv::UMat(buff_lvl, 1, CV_8U); + + hashTable = CustomHashSet(); + + frameParams = Vec6f(); + pixNorms = UMat(); +} + + +void HashTSDFVolumeGPU::integrateAllVolumeUnitsGPU(const UMat& depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics) +{ + CV_TRACE_FUNCTION(); + CV_Assert(!depth.empty()); + + String errorStr; + String name = "integrateAllVolumeUnits"; + ocl::ProgramSource source = ocl::rgbd::hash_tsdf_oclsrc; + String options = "-cl-mad-enable"; + ocl::Kernel k; + k.create(name.c_str(), source, options, &errorStr); + + if (k.empty()) + throw std::runtime_error("Failed to create kernel: " + errorStr); + + float dfac = 1.f / depthFactor; + Vec2f fxy(intrinsics.fx, intrinsics.fy), cxy(intrinsics.cx, intrinsics.cy); + Matx44f vol2camMatrix = (Affine3f(cameraPose).inv() * pose).matrix; + Matx44f camInvMatrix = Affine3f(cameraPose).inv().matrix; + + UMat hashesGpu = Mat(hashTable.hashes, false).getUMat(ACCESS_READ); + UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ); + + k.args(ocl::KernelArg::ReadOnly(depth), + ocl::KernelArg::PtrReadOnly(hashesGpu), + ocl::KernelArg::PtrReadOnly(hashDataGpu), + ocl::KernelArg::ReadWrite(volUnitsData), + ocl::KernelArg::ReadOnly(pixNorms), + ocl::KernelArg::ReadOnly(isActiveFlags), + vol2camMatrix, + camInvMatrix, + voxelSize, + volumeUnitResolution, + volStrides.val, + fxy.val, + cxy.val, + dfac, + truncDist, + int(maxWeight) + ); + + int resol = volumeUnitResolution; + size_t globalSize[3]; + globalSize[0] = (size_t)resol; + globalSize[1] = (size_t)resol; + globalSize[2] = (size_t)hashTable.last; // num of volume units + + if (!k.run(3, globalSize, NULL, true)) + throw std::runtime_error("Failed to run kernel"); +} + + +void HashTSDFVolumeGPU::allocateVolumeUnits(const UMat& _depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics) +{ + constexpr int pixCapacity = 16; + typedef std::array LocalVolUnits; + + Depth depth = _depth.getMat(ACCESS_READ); + + //! Compute volumes to be allocated + const int depthStride = volumeUnitDegree; + const float invDepthFactor = 1.f / depthFactor; + const Intr::Reprojector reproj(intrinsics.makeReprojector()); + const Affine3f cam2vol(pose.inv() * Affine3f(cameraPose)); + const Point3f truncPt(truncDist, truncDist, truncDist); + Mutex mutex; + + // for new indices + CustomHashSet thm; + + auto fillLocalAcessVolUnits = [&](const Range& xrange, const Range& yrange, CustomHashSet& ghm) + { + for (int y = yrange.start; y < yrange.end; y += depthStride) + { + const depthType* depthRow = depth[y]; + for (int x = xrange.start; x < xrange.end; x += depthStride) + { + depthType z = depthRow[x] * invDepthFactor; + if (z <= 0 || z > this->truncateThreshold) + continue; + Point3f camPoint = reproj(Point3f((float)x, (float)y, z)); + Point3f volPoint = cam2vol * camPoint; + //! Find accessed TSDF volume unit for valid 3D vertex + Vec3i lower_bound = this->volumeToVolumeUnitIdx(volPoint - truncPt); + Vec3i upper_bound = this->volumeToVolumeUnitIdx(volPoint + truncPt); + + int pixLocalCounter = 0; + LocalVolUnits pixLocalVolUnits; + for (int i = lower_bound[0]; i <= upper_bound[0]; i++) + for (int j = lower_bound[1]; j <= upper_bound[1]; j++) + for (int k = lower_bound[2]; k <= upper_bound[2]; k++) + { + const Vec3i tsdf_idx = Vec3i(i, j, k); + + if (hashTable.find(tsdf_idx) < 0) + { + bool found = false; + for (int c = 0; c < pixLocalCounter; c++) + { + if (pixLocalVolUnits[c] == tsdf_idx) + { + found = true; break; + } + } + if (!found) + { + pixLocalVolUnits[pixLocalCounter++] = tsdf_idx; + if (pixLocalCounter >= pixCapacity) + { + return; + } + } + } + } + + // lock localAccessVolUnits somehow + for (int i = 0; i < pixLocalCounter; i++) + { + Vec3i idx = pixLocalVolUnits[i]; + if (!ghm.insert(idx)) + { + //return; + } + } + // unlock + } + } + }; + + Rect dim(0, 0, depth.cols, depth.rows); + Size gsz(32, 32); + Size gg(divUp(dim.width, gsz.width), divUp(dim.height, gsz.height)); + + bool needReallocation = false; + auto allocateLambda = [&](const Range& r) + { + + for (int yg = r.start; yg < r.end; yg++) + { + for (int xg = 0; xg < gg.width; xg++) + { + Rect gr(xg * gsz.width, yg * gsz.height, (xg + 1) * gsz.width, (yg + 1) * gsz.height); + gr = gr & dim; + Range xr(gr.tl().x, gr.br().x), yr(gr.tl().y, gr.br().y); + + CustomHashSet ghm; + + fillLocalAcessVolUnits(xr, yr, ghm); + + if (ghm.last) + { + std::lock_guard al(mutex); + + for (int i = 0; i < ghm.last; i++) + { + Vec4i node = ghm.data[i]; + Vec3i idx(node[0], node[1], node[2]); + + //TODO: 1. add to separate hash map instead, then merge on GPU side + + int result = thm.insert(idx); + if (!result) + { + needReallocation = true; + return; + } + } + } + } + } + + }; + + do + { + if (needReallocation) + { + thm.capacity *= 2; + thm.data.resize(thm.capacity); + + needReallocation = false; + } + + parallel_for_(Range(0, gg.height), allocateLambda); + } while (needReallocation); + + + auto pushToGlobal = [](const CustomHashSet _thm, CustomHashSet& _globalHashMap, + bool& _needReallocation, Mutex& _mutex) + { + for (int i = 0; i < _thm.last; i++) + { + Vec4i node = _thm.data[i]; + Vec3i idx(node[0], node[1], node[2]); + + std::lock_guard al(_mutex); + + int result = _globalHashMap.insert(idx); + if (result == 0) + { + _needReallocation = true; + return; + } + } + }; + + needReallocation = false; + do + { + if (needReallocation) + { + hashTable.capacity *= 2; + hashTable.data.resize(hashTable.capacity); + + needReallocation = false; + } + + pushToGlobal(thm, hashTable, needReallocation, mutex); + } while (needReallocation); +} + + +void HashTSDFVolumeGPU::markActive(const Matx44f& cameraPose, const Intr& intrinsics, const Size frameSz, const int frameId) +{ + //! Mark volumes in the camera frustum as active + String errorStr; + String name = "markActive"; + ocl::ProgramSource source = ocl::rgbd::hash_tsdf_oclsrc; + String options = "-cl-mad-enable"; + ocl::Kernel k; + k.create(name.c_str(), source, options, &errorStr); + + if (k.empty()) + throw std::runtime_error("Failed to create kernel: " + errorStr); + + const Affine3f vol2cam(Affine3f(cameraPose.inv()) * pose); + const Intr::Projector proj(intrinsics.makeProjector()); + Vec2f fxy(proj.fx, proj.fy), cxy(proj.cx, proj.cy); + + UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ); + + k.args( + ocl::KernelArg::PtrReadOnly(hashDataGpu), + ocl::KernelArg::WriteOnly(isActiveFlags), + ocl::KernelArg::WriteOnly(lastVisibleIndices), + vol2cam.matrix, + fxy, + cxy, + frameSz, + volumeUnitSize, + hashTable.last, + truncateThreshold, + frameId + ); + + size_t globalSize[1] = { (size_t)hashTable.last }; + if (!k.run(1, globalSize, nullptr, true)) + throw std::runtime_error("Failed to run kernel"); +} + + +void HashTSDFVolumeGPU::integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics, const int frameId) +{ + CV_TRACE_FUNCTION(); + + CV_Assert(_depth.type() == DEPTH_TYPE); + UMat depth = _depth.getUMat(); + + // Save length to fill new data in ranges + int sizeBefore = hashTable.last; + allocateVolumeUnits(depth, depthFactor, cameraPose, intrinsics); + int sizeAfter = hashTable.last; + //! Perform the allocation + + // Grow buffers + int buff_lvl = (int)(1 << bufferSizeDegree); + if (sizeAfter >= buff_lvl) + { + bufferSizeDegree = (int)(log2(sizeAfter) + 1); // clz() would be better + int oldBuffSize = buff_lvl; + buff_lvl = (int)pow(2, bufferSizeDegree); + + volUnitsDataCopy.resize(buff_lvl); + + Range oldr(0, oldBuffSize); + int volCubed = volumeUnitResolution * volumeUnitResolution * volumeUnitResolution; + UMat newData(buff_lvl, volCubed, CV_8UC2); + volUnitsData.copyTo(newData.rowRange(oldr)); + volUnitsData = newData; + + UMat newLastVisibleIndices(buff_lvl, 1, CV_32S); + lastVisibleIndices.copyTo(newLastVisibleIndices.rowRange(oldr)); + lastVisibleIndices = newLastVisibleIndices; + + UMat newIsActiveFlags(buff_lvl, 1, CV_8U); + isActiveFlags.copyTo(newIsActiveFlags.rowRange(oldr)); + isActiveFlags = newIsActiveFlags; + } + + // Fill data for new volume units + Range r(sizeBefore, sizeAfter); + if (r.start < r.end) + { + lastVisibleIndices.rowRange(r) = frameId; + isActiveFlags.rowRange(r) = 1; + + TsdfVoxel emptyVoxel(floatToTsdf(0.0f), 0); + volUnitsData.rowRange(r) = Vec2b((uchar)(emptyVoxel.tsdf), (uchar)(emptyVoxel.weight)); + } + + //! Mark volumes in the camera frustum as active + markActive(cameraPose, intrinsics, depth.size(), frameId); + + Vec6f newParams((float)depth.rows, (float)depth.cols, + intrinsics.fx, intrinsics.fy, + intrinsics.cx, intrinsics.cy); + if (!(frameParams == newParams)) + { + frameParams = newParams; + pixNorms = preCalculationPixNormGPU(depth, intrinsics); + } + + //! Integrate the correct volumeUnits + integrateAllVolumeUnitsGPU(depth, depthFactor, cameraPose, intrinsics); +} + + +//TODO: remove these functions when everything is done on GPU +cv::Vec3i HashTSDFVolumeGPU::volumeToVolumeUnitIdx(const cv::Point3f& p) const +{ + return cv::Vec3i(cvFloor(p.x / volumeUnitSize), cvFloor(p.y / volumeUnitSize), + cvFloor(p.z / volumeUnitSize)); +} + +cv::Point3f HashTSDFVolumeGPU::volumeUnitIdxToVolume(const cv::Vec3i& volumeUnitIdx) const +{ + return cv::Point3f(volumeUnitIdx[0] * volumeUnitSize, volumeUnitIdx[1] * volumeUnitSize, + volumeUnitIdx[2] * volumeUnitSize); +} + +cv::Point3f HashTSDFVolumeGPU::voxelCoordToVolume(const cv::Vec3i& voxelIdx) const +{ + return cv::Point3f(voxelIdx[0] * voxelSize, voxelIdx[1] * voxelSize, voxelIdx[2] * voxelSize); +} + +cv::Vec3i HashTSDFVolumeGPU::volumeToVoxelCoord(const cv::Point3f& point) const +{ + return cv::Vec3i(cvFloor(point.x * voxelSizeInv), cvFloor(point.y * voxelSizeInv), + cvFloor(point.z * voxelSizeInv)); +} + +inline TsdfVoxel HashTSDFVolumeGPU::new_at(const cv::Vec3i& volumeIdx, int indx) const +{ + //! Out of bounds + if ((volumeIdx[0] >= volumeUnitResolution || volumeIdx[0] < 0) || + (volumeIdx[1] >= volumeUnitResolution || volumeIdx[1] < 0) || + (volumeIdx[2] >= volumeUnitResolution || volumeIdx[2] < 0)) + { + return TsdfVoxel(floatToTsdf(1.0f), 0); + } + + const TsdfVoxel* volData = volUnitsDataCopy.ptr(indx); + int coordBase = + volumeIdx[0] * volStrides[0] + + volumeIdx[1] * volStrides[1] + + volumeIdx[2] * volStrides[2]; + return volData[coordBase]; +} + +TsdfVoxel HashTSDFVolumeGPU::new_atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, int indx) const +{ + if (indx < 0) + { + return TsdfVoxel(floatToTsdf(1.f), 0); + } + Vec3i volUnitLocalIdx = point - Vec3i(volumeUnitIdx[0] << volumeUnitDegree, + volumeUnitIdx[1] << volumeUnitDegree, + volumeUnitIdx[2] << volumeUnitDegree); + + // expanding at(), removing bounds check + const TsdfVoxel* volData = volUnitsDataCopy.ptr(indx); + int coordBase = volUnitLocalIdx[0] * volStrides[0] + + volUnitLocalIdx[1] * volStrides[1] + + volUnitLocalIdx[2] * volStrides[2]; + return volData[coordBase]; +} + +float HashTSDFVolumeGPU::interpolateVoxelPoint(const Point3f& point) const +{ + const Vec3i local_neighbourCoords[] = { {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {0, 1, 1}, + {1, 0, 0}, {1, 0, 1}, {1, 1, 0}, {1, 1, 1} }; + + // A small hash table to reduce a number of find() calls + // -2 and lower means not queried yet + // -1 means not found + // 0+ means found + int iterMap[8]; + for (int i = 0; i < 8; i++) + { + iterMap[i] = -2; + } + + int ix = cvFloor(point.x); + int iy = cvFloor(point.y); + int iz = cvFloor(point.z); + + float tx = point.x - ix; + float ty = point.y - iy; + float tz = point.z - iz; + + Vec3i iv(ix, iy, iz); + float vx[8]; + for (int i = 0; i < 8; i++) + { + Vec3i pt = iv + local_neighbourCoords[i]; + + Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree); + int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4; + auto it = iterMap[dictIdx]; + if (it < -1) + { + it = hashTable.find(volumeUnitIdx); + iterMap[dictIdx] = it; + } + + vx[i] = new_atVolumeUnit(pt, volumeUnitIdx, it).tsdf; + } + + return interpolate(tx, ty, tz, vx); +} + +inline float HashTSDFVolumeGPU::interpolateVoxel(const cv::Point3f& point) const +{ + return interpolateVoxelPoint(point * voxelSizeInv); +} + +Point3f HashTSDFVolumeGPU::getNormalVoxel(const Point3f& point) const +{ + Vec3f normal = Vec3f(0, 0, 0); + + Point3f ptVox = point * voxelSizeInv; + Vec3i iptVox(cvFloor(ptVox.x), cvFloor(ptVox.y), cvFloor(ptVox.z)); + + // A small hash table to reduce a number of find() calls + // -2 and lower means not queried yet + // -1 means not found + // 0+ means found + int iterMap[8]; + for (int i = 0; i < 8; i++) + { + iterMap[i] = -2; + } + +#if !USE_INTERPOLATION_IN_GETNORMAL + const Vec3i offsets[] = { { 1, 0, 0}, {-1, 0, 0}, { 0, 1, 0}, // 0-3 + { 0, -1, 0}, { 0, 0, 1}, { 0, 0, -1} // 4-7 + }; + const int nVals = 6; + +#else + const Vec3i offsets[] = { { 0, 0, 0}, { 0, 0, 1}, { 0, 1, 0}, { 0, 1, 1}, // 0-3 + { 1, 0, 0}, { 1, 0, 1}, { 1, 1, 0}, { 1, 1, 1}, // 4-7 + {-1, 0, 0}, {-1, 0, 1}, {-1, 1, 0}, {-1, 1, 1}, // 8-11 + { 2, 0, 0}, { 2, 0, 1}, { 2, 1, 0}, { 2, 1, 1}, // 12-15 + { 0, -1, 0}, { 0, -1, 1}, { 1, -1, 0}, { 1, -1, 1}, // 16-19 + { 0, 2, 0}, { 0, 2, 1}, { 1, 2, 0}, { 1, 2, 1}, // 20-23 + { 0, 0, -1}, { 0, 1, -1}, { 1, 0, -1}, { 1, 1, -1}, // 24-27 + { 0, 0, 2}, { 0, 1, 2}, { 1, 0, 2}, { 1, 1, 2}, // 28-31 + }; + const int nVals = 32; +#endif + + float vals[nVals]; + for (int i = 0; i < nVals; i++) + { + Vec3i pt = iptVox + offsets[i]; + + Vec3i volumeUnitIdx = Vec3i(pt[0] >> volumeUnitDegree, pt[1] >> volumeUnitDegree, pt[2] >> volumeUnitDegree); + + int dictIdx = (volumeUnitIdx[0] & 1) + (volumeUnitIdx[1] & 1) * 2 + (volumeUnitIdx[2] & 1) * 4; + auto it = iterMap[dictIdx]; + if (it < -1) + { + it = hashTable.find(volumeUnitIdx); + iterMap[dictIdx] = it; + } + + vals[i] = tsdfToFloat(new_atVolumeUnit(pt, volumeUnitIdx, it).tsdf); + } + +#if !USE_INTERPOLATION_IN_GETNORMAL + for (int c = 0; c < 3; c++) + { + normal[c] = vals[c * 2] - vals[c * 2 + 1]; + } +#else + + float cxv[8], cyv[8], czv[8]; + + // How these numbers were obtained: + // 1. Take the basic interpolation sequence: + // 000, 001, 010, 011, 100, 101, 110, 111 + // where each digit corresponds to shift by x, y, z axis respectively. + // 2. Add +1 for next or -1 for prev to each coordinate to corresponding axis + // 3. Search corresponding values in offsets + const int idxxp[8] = { 8, 9, 10, 11, 0, 1, 2, 3 }; + const int idxxn[8] = { 4, 5, 6, 7, 12, 13, 14, 15 }; + const int idxyp[8] = { 16, 17, 0, 1, 18, 19, 4, 5 }; + const int idxyn[8] = { 2, 3, 20, 21, 6, 7, 22, 23 }; + const int idxzp[8] = { 24, 0, 25, 2, 26, 4, 27, 6 }; + const int idxzn[8] = { 1, 28, 3, 29, 5, 30, 7, 31 }; + +#if !USE_INTRINSICS + for (int i = 0; i < 8; i++) + { + cxv[i] = vals[idxxn[i]] - vals[idxxp[i]]; + cyv[i] = vals[idxyn[i]] - vals[idxyp[i]]; + czv[i] = vals[idxzn[i]] - vals[idxzp[i]]; + } +#else + +# if CV_SIMD >= 32 + v_float32x8 cxp = v_lut(vals, idxxp); + v_float32x8 cxn = v_lut(vals, idxxn); + + v_float32x8 cyp = v_lut(vals, idxyp); + v_float32x8 cyn = v_lut(vals, idxyn); + + v_float32x8 czp = v_lut(vals, idxzp); + v_float32x8 czn = v_lut(vals, idxzn); + + v_float32x8 vcxv = cxn - cxp; + v_float32x8 vcyv = cyn - cyp; + v_float32x8 vczv = czn - czp; + + v_store(cxv, vcxv); + v_store(cyv, vcyv); + v_store(czv, vczv); +# else + v_float32x4 cxp0 = v_lut(vals, idxxp + 0); v_float32x4 cxp1 = v_lut(vals, idxxp + 4); + v_float32x4 cxn0 = v_lut(vals, idxxn + 0); v_float32x4 cxn1 = v_lut(vals, idxxn + 4); + + v_float32x4 cyp0 = v_lut(vals, idxyp + 0); v_float32x4 cyp1 = v_lut(vals, idxyp + 4); + v_float32x4 cyn0 = v_lut(vals, idxyn + 0); v_float32x4 cyn1 = v_lut(vals, idxyn + 4); + + v_float32x4 czp0 = v_lut(vals, idxzp + 0); v_float32x4 czp1 = v_lut(vals, idxzp + 4); + v_float32x4 czn0 = v_lut(vals, idxzn + 0); v_float32x4 czn1 = v_lut(vals, idxzn + 4); + + v_float32x4 cxv0 = cxn0 - cxp0; v_float32x4 cxv1 = cxn1 - cxp1; + v_float32x4 cyv0 = cyn0 - cyp0; v_float32x4 cyv1 = cyn1 - cyp1; + v_float32x4 czv0 = czn0 - czp0; v_float32x4 czv1 = czn1 - czp1; + + v_store(cxv + 0, cxv0); v_store(cxv + 4, cxv1); + v_store(cyv + 0, cyv0); v_store(cyv + 4, cyv1); + v_store(czv + 0, czv0); v_store(czv + 4, czv1); +#endif + +#endif + + float tx = ptVox.x - iptVox[0]; + float ty = ptVox.y - iptVox[1]; + float tz = ptVox.z - iptVox[2]; + + normal[0] = interpolate(tx, ty, tz, cxv); + normal[1] = interpolate(tx, ty, tz, cyv); + normal[2] = interpolate(tx, ty, tz, czv); +#endif + float nv = sqrt(normal[0] * normal[0] + + normal[1] * normal[1] + + normal[2] * normal[2]); + return nv < 0.0001f ? nan3 : normal / nv; +} + + +void HashTSDFVolumeGPU::raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize, + OutputArray _points, OutputArray _normals) const +{ + CV_TRACE_FUNCTION(); + CV_Assert(frameSize.area() > 0); + + String errorStr; + String name = "raycast"; + ocl::ProgramSource source = ocl::rgbd::hash_tsdf_oclsrc; + String options = "-cl-mad-enable"; + ocl::Kernel k; + k.create(name.c_str(), source, options, &errorStr); + + if (k.empty()) + throw std::runtime_error("Failed to create kernel: " + errorStr); + + _points.create(frameSize, CV_32FC4); + _normals.create(frameSize, CV_32FC4); + + UMat points = _points.getUMat(); + UMat normals = _normals.getUMat(); + + Intr::Reprojector r = intrinsics.makeReprojector(); + Vec2f finv(r.fxinv, r.fyinv), cxy(r.cx, r.cy); + + Vec4f boxMin, boxMax(volumeUnitSize - voxelSize, + volumeUnitSize - voxelSize, + volumeUnitSize - voxelSize); + + float tstep = truncDist * raycastStepFactor; + + const HashTSDFVolumeGPU& volume(*this); + const Affine3f cam2vol(volume.pose.inv() * Affine3f(cameraPose)); + const Affine3f vol2cam(Affine3f(cameraPose.inv()) * volume.pose); + + Matx44f cam2volRotGPU = cam2vol.matrix; + Matx44f vol2camRotGPU = vol2cam.matrix; + + UMat volPoseGpu = Mat(pose.matrix).getUMat(ACCESS_READ); + UMat invPoseGpu = Mat(pose.inv().matrix).getUMat(ACCESS_READ); + + UMat hashesGpu = Mat(hashTable.hashes, false).getUMat(ACCESS_READ); + UMat hashDataGpu = Mat(hashTable.data, false).getUMat(ACCESS_READ); + + k.args( + ocl::KernelArg::PtrReadOnly(hashesGpu), + ocl::KernelArg::PtrReadOnly(hashDataGpu), + ocl::KernelArg::WriteOnlyNoSize(points), + ocl::KernelArg::WriteOnlyNoSize(normals), + frameSize, + ocl::KernelArg::ReadOnly(volUnitsData), + cam2volRotGPU, + vol2camRotGPU, + float(volume.truncateThreshold), + finv.val, cxy.val, + boxMin.val, boxMax.val, + tstep, + voxelSize, + voxelSizeInv, + volumeUnitSize, + volume.truncDist, + volumeUnitDegree, + volStrides + ); + + size_t globalSize[2]; + globalSize[0] = (size_t)frameSize.width; + globalSize[1] = (size_t)frameSize.height; + + if (!k.run(2, globalSize, NULL, true)) + throw std::runtime_error("Failed to run kernel"); +} + +void HashTSDFVolumeGPU::fetchPointsNormals(OutputArray _points, OutputArray _normals) const +{ + CV_TRACE_FUNCTION(); + + if (_points.needed()) + { + //TODO: remove it when it works w/o CPU code + volUnitsData.copyTo(volUnitsDataCopy); + //TODO: remove it when it works w/o CPU code + //TODO: enable it when it's on GPU + //UMat hashDataGpu(hashMap.capacity, 1, CV_32SC4); + //Mat(hashMap.data, false).copyTo(hashDataGpu); + + std::vector> pVecs, nVecs; + + Range _fetchRange(0, hashTable.last); + + const int nstripes = -1; + + const HashTSDFVolumeGPU& volume(*this); + bool needNormals(_normals.needed()); + Mutex mutex; + + auto _HashFetchPointsNormalsInvoker = [&](const Range& range) + { + std::vector points, normals; + for (int row = range.start; row < range.end; row++) + { + cv::Vec4i idx4 = hashTable.data[row]; + cv::Vec3i idx(idx4[0], idx4[1], idx4[2]); + + Point3f base_point = volume.volumeUnitIdxToVolume(idx); + + std::vector localPoints; + std::vector localNormals; + for (int x = 0; x < volume.volumeUnitResolution; x++) + for (int y = 0; y < volume.volumeUnitResolution; y++) + for (int z = 0; z < volume.volumeUnitResolution; z++) + { + cv::Vec3i voxelIdx(x, y, z); + TsdfVoxel voxel = new_at(voxelIdx, row); + + if (voxel.tsdf != -128 && voxel.weight != 0) + { + Point3f point = base_point + volume.voxelCoordToVolume(voxelIdx); + + localPoints.push_back(toPtype(point)); + if (needNormals) + { + Point3f normal = volume.getNormalVoxel(point); + localNormals.push_back(toPtype(normal)); + } + } + } + + AutoLock al(mutex); + pVecs.push_back(localPoints); + nVecs.push_back(localNormals); + } + }; + + parallel_for_(_fetchRange, _HashFetchPointsNormalsInvoker, nstripes); + + std::vector points, normals; + for (size_t i = 0; i < pVecs.size(); i++) + { + points.insert(points.end(), pVecs[i].begin(), pVecs[i].end()); + normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end()); + } + + _points.create((int)points.size(), 1, POINT_TYPE); + if (!points.empty()) + Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat()); + + if (_normals.needed()) + { + _normals.create((int)normals.size(), 1, POINT_TYPE); + if (!normals.empty()) + Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat()); + } + } +} + +void HashTSDFVolumeGPU::fetchNormals(InputArray _points, OutputArray _normals) const +{ + CV_TRACE_FUNCTION(); + + if (_normals.needed()) + { + //TODO: remove it when it works w/o CPU code + volUnitsData.copyTo(volUnitsDataCopy); + + Points points = _points.getMat(); + CV_Assert(points.type() == POINT_TYPE); + _normals.createSameSize(_points, _points.type()); + Normals normals = _normals.getMat(); + const HashTSDFVolumeGPU& _volume = *this; + auto HashPushNormals = [&](const ptype& point, const int* position) { + const HashTSDFVolumeGPU& volume(_volume); + Affine3f invPose(volume.pose.inv()); + Point3f p = fromPtype(point); + Point3f n = nan3; + if (!isNaN(p)) + { + Point3f voxelPoint = invPose * p; + n = volume.pose.rotation() * volume.getNormalVoxel(voxelPoint); + } + normals(position[0], position[1]) = toPtype(n); + }; + points.forEach(HashPushNormals); + } + +} + +int HashTSDFVolumeGPU::getVisibleBlocks(int currFrameId, int frameThreshold) const +{ + Mat cpuIndices = lastVisibleIndices.getMat(ACCESS_READ); + + int numVisibleBlocks = 0; + //! TODO: Iterate over map parallely? + for (int i = 0; i < hashTable.last; i++) + { + if (cpuIndices.at(i) > (currFrameId - frameThreshold)) + numVisibleBlocks++; + } + return numVisibleBlocks; +} + +#endif + +//template +Ptr makeHashTSDFVolume(const VolumeParams& _params) +{ +#ifdef HAVE_OPENCL + if (ocl::useOpenCL()) + return makePtr(_params.voxelSize, _params.pose.matrix, _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight, + _params.depthTruncThreshold, _params.unitResolution); +#endif + return makePtr(_params.voxelSize, _params.pose.matrix, _params.raycastStepFactor, _params.tsdfTruncDist, _params.maxWeight, + _params.depthTruncThreshold, _params.unitResolution); +} + +//template +Ptr makeHashTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist, + int _maxWeight, float truncateThreshold, int volumeUnitResolution) +{ +#ifdef HAVE_OPENCL + if (ocl::useOpenCL()) + return makePtr(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, truncateThreshold, + volumeUnitResolution); +#endif + return makePtr(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, truncateThreshold, + volumeUnitResolution); +} + } // namespace kinfu } // namespace cv diff --git a/modules/rgbd/src/hash_tsdf.hpp b/modules/rgbd/src/hash_tsdf.hpp index 31bf026785b..34e73f30383 100644 --- a/modules/rgbd/src/hash_tsdf.hpp +++ b/modules/rgbd/src/hash_tsdf.hpp @@ -26,108 +26,26 @@ class HashTSDFVolume : public Volume virtual ~HashTSDFVolume() = default; + virtual int getVisibleBlocks(int currFrameId, int frameThreshold) const = 0; + virtual size_t getTotalVolumeUnits() const = 0; + public: int maxWeight; float truncDist; float truncateThreshold; int volumeUnitResolution; + int volumeUnitDegree; float volumeUnitSize; bool zFirstMemOrder; + Vec4i volStrides; }; -//! Spatial hashing -struct tsdf_hash -{ - size_t operator()(const Vec3i& x) const noexcept - { - size_t seed = 0; - constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9; - for (uint16_t i = 0; i < 3; i++) - { - seed ^= std::hash()(x[i]) + GOLDEN_RATIO + (seed << 6) + (seed >> 2); - } - return seed; - } -}; - -typedef unsigned int VolumeIndex; -struct VolumeUnit -{ - cv::Vec3i coord; - VolumeIndex index; - cv::Matx44f pose; - int lastVisibleIndex = 0; - bool isActive; -}; - -typedef std::unordered_set VolumeUnitIndexSet; -typedef std::unordered_map VolumeUnitIndexes; - -class HashTSDFVolumeCPU : public HashTSDFVolume -{ - public: - // dimension in voxels, size in meters - HashTSDFVolumeCPU(float _voxelSize, const Matx44f& _pose, float _raycastStepFactor, float _truncDist, int _maxWeight, - float _truncateThreshold, int _volumeUnitRes, bool zFirstMemOrder = true); - - HashTSDFVolumeCPU(const VolumeParams& _volumeParams, bool zFirstMemOrder = true); - - void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const kinfu::Intr& intrinsics, - const int frameId = 0) override; - void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize, OutputArray points, - OutputArray normals) const override; - - void fetchNormals(InputArray points, OutputArray _normals) const override; - void fetchPointsNormals(OutputArray points, OutputArray normals) const override; - - void reset() override; - size_t getTotalVolumeUnits() const { return volumeUnits.size(); } - int getVisibleBlocks(int currFrameId, int frameThreshold) const; - - //! Return the voxel given the voxel index in the universal volume (1 unit = 1 voxel_length) - TsdfVoxel at(const Vec3i& volumeIdx) const; - - //! Return the voxel given the point in volume coordinate system i.e., (metric scale 1 unit = - //! 1m) - virtual TsdfVoxel at(const cv::Point3f& point) const; - virtual TsdfVoxel _at(const cv::Vec3i& volumeIdx, VolumeIndex indx) const; - - TsdfVoxel atVolumeUnit(const Vec3i& point, const Vec3i& volumeUnitIdx, VolumeUnitIndexes::const_iterator it) const; - - - float interpolateVoxelPoint(const Point3f& point) const; - float interpolateVoxel(const cv::Point3f& point) const; - Point3f getNormalVoxel(const cv::Point3f& p) const; - - //! Utility functions for coordinate transformations - Vec3i volumeToVolumeUnitIdx(const Point3f& point) const; - Point3f volumeUnitIdxToVolume(const Vec3i& volumeUnitIdx) const; - - Point3f voxelCoordToVolume(const Vec3i& voxelIdx) const; - Vec3i volumeToVoxelCoord(const Point3f& point) const; - - public: - Vec4i volStrides; - Vec6f frameParams; - Mat pixNorms; - VolumeUnitIndexes volumeUnits; - cv::Mat volUnitsData; - VolumeIndex lastVolIndex; -}; - -template -Ptr makeHashTSDFVolume(const VolumeParams& _volumeParams) -{ - return makePtr(_volumeParams); -} - -template +//template +Ptr makeHashTSDFVolume(const VolumeParams& _volumeParams); +//template Ptr makeHashTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist, - int _maxWeight, float truncateThreshold, int volumeUnitResolution = 16) -{ - return makePtr(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, truncateThreshold, - volumeUnitResolution); -} + int _maxWeight, float truncateThreshold, int volumeUnitResolution = 16); + } // namespace kinfu } // namespace cv #endif diff --git a/modules/rgbd/src/large_kinfu.cpp b/modules/rgbd/src/large_kinfu.cpp index dedeabb82db..16006713c53 100644 --- a/modules/rgbd/src/large_kinfu.cpp +++ b/modules/rgbd/src/large_kinfu.cpp @@ -316,21 +316,21 @@ template void LargeKinfuImpl::getCloud(OutputArray p, OutputArray n) const { auto currSubmap = submapMgr->getCurrentSubmap(); - currSubmap->volume.fetchPointsNormals(p, n); + currSubmap->volume->fetchPointsNormals(p, n); } template void LargeKinfuImpl::getPoints(OutputArray points) const { auto currSubmap = submapMgr->getCurrentSubmap(); - currSubmap->volume.fetchPointsNormals(points, noArray()); + currSubmap->volume->fetchPointsNormals(points, noArray()); } template void LargeKinfuImpl::getNormals(InputArray points, OutputArray normals) const { auto currSubmap = submapMgr->getCurrentSubmap(); - currSubmap->volume.fetchNormals(points, normals); + currSubmap->volume->fetchNormals(points, normals); } // importing class diff --git a/modules/rgbd/src/opencl/hash_tsdf.cl b/modules/rgbd/src/opencl/hash_tsdf.cl new file mode 100644 index 00000000000..0e611c1d3dd --- /dev/null +++ b/modules/rgbd/src/opencl/hash_tsdf.cl @@ -0,0 +1,653 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory + +#define USE_INTERPOLATION_IN_GETNORMAL 1 +#define HASH_DIVISOR 32768 + +typedef char int8_t; +typedef uint int32_t; + +typedef int8_t TsdfType; +typedef uchar WeightType; + +struct TsdfVoxel +{ + TsdfType tsdf; + WeightType weight; +}; + + +static inline TsdfType floatToTsdf(float num) +{ + int8_t res = (int8_t) ( (num * (-128)) ); + res = res ? res : (num < 0 ? 1 : -1); + return res; +} + +static inline float tsdfToFloat(TsdfType num) +{ + return ( (float) num ) / (-128); +} + +static uint calc_hash(int3 x) +{ + unsigned int seed = 0; + unsigned int GOLDEN_RATIO = 0x9e3779b9; + seed ^= x.s0 + GOLDEN_RATIO + (seed << 6) + (seed >> 2); + seed ^= x.s1 + GOLDEN_RATIO + (seed << 6) + (seed >> 2); + seed ^= x.s2 + GOLDEN_RATIO + (seed << 6) + (seed >> 2); + return seed; +} + + +//TODO: make hashDivisor a power of 2 +//TODO: put it to this .cl file as a constant +static int custom_find(int3 idx, const int hashDivisor, __global const int* hashes, + __global const int4* data) +{ + int hash = calc_hash(idx) % hashDivisor; + int place = hashes[hash]; + // search a place + while (place >= 0) + { + if (all(data[place].s012 == idx)) + break; + else + place = data[place].s3; + } + + return place; +} + + + +static void integrateVolumeUnit( + int x, int y, + __global const char * depthptr, + int depth_step, int depth_offset, + int depth_rows, int depth_cols, + __global struct TsdfVoxel * volumeptr, + const __global char * pixNormsPtr, + int pixNormsStep, int pixNormsOffset, + int pixNormsRows, int pixNormsCols, + const float16 vol2camMatrix, + const float voxelSize, + const int4 volResolution4, + const int4 volStrides4, + const float2 fxy, + const float2 cxy, + const float dfac, + const float truncDist, + const int maxWeight + ) +{ + const int3 volResolution = volResolution4.xyz; + + if(x >= volResolution.x || y >= volResolution.y) + return; + + // coord-independent constants + const int3 volStrides = volStrides4.xyz; + const float2 limits = (float2)(depth_cols-1, depth_rows-1); + + const float4 vol2cam0 = vol2camMatrix.s0123; + const float4 vol2cam1 = vol2camMatrix.s4567; + const float4 vol2cam2 = vol2camMatrix.s89ab; + + const float truncDistInv = 1.f/truncDist; + + // optimization of camSpace transformation (vector addition instead of matmul at each z) + float4 inPt = (float4)(x*voxelSize, y*voxelSize, 0, 1); + float3 basePt = (float3)(dot(vol2cam0, inPt), + dot(vol2cam1, inPt), + dot(vol2cam2, inPt)); + + float3 camSpacePt = basePt; + + // zStep == vol2cam*(float3(x, y, 1)*voxelSize) - basePt; + float3 zStep = ((float3)(vol2cam0.z, vol2cam1.z, vol2cam2.z))*voxelSize; + + int volYidx = x*volStrides.x + y*volStrides.y; + + int startZ, endZ; + if(fabs(zStep.z) > 1e-5f) + { + int baseZ = convert_int(-basePt.z / zStep.z); + if(zStep.z > 0) + { + startZ = baseZ; + endZ = volResolution.z; + } + else + { + startZ = 0; + endZ = baseZ; + } + } + else + { + if(basePt.z > 0) + { + startZ = 0; endZ = volResolution.z; + } + else + { + // z loop shouldn't be performed + startZ = endZ = 0; + } + } + + startZ = max(0, startZ); + endZ = min(volResolution.z, endZ); + + for(int z = startZ; z < endZ; z++) + { + // optimization of the following: + //float3 camSpacePt = vol2cam * ((float3)(x, y, z)*voxelSize); + camSpacePt += zStep; + + if(camSpacePt.z <= 0) + continue; + + float3 camPixVec = camSpacePt / camSpacePt.z; + float2 projected = mad(camPixVec.xy, fxy, cxy); // mad(a,b,c) = a * b + c + + float v; + // bilinearly interpolate depth at projected + if(all(projected >= 0) && all(projected < limits)) + { + float2 ip = floor(projected); + int xi = ip.x, yi = ip.y; + + __global const float* row0 = (__global const float*)(depthptr + depth_offset + + (yi+0)*depth_step); + __global const float* row1 = (__global const float*)(depthptr + depth_offset + + (yi+1)*depth_step); + + float v00 = row0[xi+0]; + float v01 = row0[xi+1]; + float v10 = row1[xi+0]; + float v11 = row1[xi+1]; + float4 vv = (float4)(v00, v01, v10, v11); + + // assume correct depth is positive + if(all(vv > 0)) + { + float2 t = projected - ip; + float2 vf = mix(vv.xz, vv.yw, t.x); + v = mix(vf.s0, vf.s1, t.y); + } + else + continue; + } + else + continue; + + if(v == 0) + continue; + + int2 projInt = convert_int2(projected); + float pixNorm = *(__global const float*)(pixNormsPtr + pixNormsOffset + projInt.y*pixNormsStep + projInt.x*sizeof(float)); + //float pixNorm = length(camPixVec); + + // difference between distances of point and of surface to camera + float sdf = pixNorm*(v*dfac - camSpacePt.z); + // possible alternative is: + // float sdf = length(camSpacePt)*(v*dfac/camSpacePt.z - 1.0); + if(sdf >= -truncDist) + { + float tsdf = fmin(1.0f, sdf * truncDistInv); + int volIdx = volYidx + z*volStrides.z; + + struct TsdfVoxel voxel = volumeptr[volIdx]; + float value = tsdfToFloat(voxel.tsdf); + int weight = voxel.weight; + // update TSDF + value = (value*weight + tsdf) / (weight + 1); + weight = min(weight + 1, maxWeight); + + voxel.tsdf = floatToTsdf(value); + voxel.weight = weight; + volumeptr[volIdx] = voxel; + } + } + +} + + +__kernel void integrateAllVolumeUnits( + // depth + __global const char * depthptr, + int depth_step, int depth_offset, + int depth_rows, int depth_cols, + // hashMap + __global const int* hashes, + __global const int4* data, + // volUnitsData + __global struct TsdfVoxel * allVolumePtr, + int table_step, int table_offset, + int table_rows, int table_cols, + // pixNorms + const __global char * pixNormsPtr, + int pixNormsStep, int pixNormsOffset, + int pixNormsRows, int pixNormsCols, + // isActiveFlags + __global const uchar* isActiveFlagsPtr, + int isActiveFlagsStep, int isActiveFlagsOffset, + int isActiveFlagsRows, int isActiveFlagsCols, + // cam matrices: + const float16 vol2cam, + const float16 camInv, + // scalars: + const float voxelSize, + const int volUnitResolution, + const int4 volStrides4, + const float2 fxy, + const float2 cxy, + const float dfac, + const float truncDist, + const int maxWeight + ) +{ + const int hash_divisor = HASH_DIVISOR; + int i = get_global_id(0); + int j = get_global_id(1); + int row = get_global_id(2); + int3 idx = data[row].xyz; + + const int4 volResolution4 = (int4)(volUnitResolution, + volUnitResolution, + volUnitResolution, + volUnitResolution); + + int isActive = *(__global const uchar*)(isActiveFlagsPtr + isActiveFlagsOffset + row); + + if (isActive) + { + int volCubed = volUnitResolution * volUnitResolution * volUnitResolution; + __global struct TsdfVoxel * volumeptr = (__global struct TsdfVoxel*) + (allVolumePtr + table_offset + row * volCubed); + + // volUnit2cam = world2cam * volUnit2world = + // camPoseInv * volUnitPose = camPoseInv * (volPose + idx * volUnitSize) = + // camPoseInv * (volPose + idx * volUnitResolution * voxelSize) = + // camPoseInv * (volPose + mulIdx) = camPoseInv * volPose + camPoseInv * mulIdx = + // vol2cam + camPoseInv * mulIdx + float3 mulIdx = convert_float3(idx * volUnitResolution) * voxelSize; + float16 volUnit2cam = vol2cam; + volUnit2cam.s37b += (float3)(dot(mulIdx, camInv.s012), + dot(mulIdx, camInv.s456), + dot(mulIdx, camInv.s89a)); + + integrateVolumeUnit( + i, j, + depthptr, + depth_step, depth_offset, + depth_rows, depth_cols, + volumeptr, + pixNormsPtr, + pixNormsStep, pixNormsOffset, + pixNormsRows, pixNormsCols, + volUnit2cam, + voxelSize, + volResolution4, + volStrides4, + fxy, + cxy, + dfac, + truncDist, + maxWeight + ); + } +} + + +static struct TsdfVoxel at(int3 volumeIdx, int row, int volumeUnitDegree, + int3 volStrides, __global const struct TsdfVoxel * allVolumePtr, int table_offset) + +{ + //! Out of bounds + if (any(volumeIdx >= (1 << volumeUnitDegree)) || + any(volumeIdx < 0)) + { + struct TsdfVoxel dummy; + dummy.tsdf = floatToTsdf(1.0f); + dummy.weight = 0; + return dummy; + } + + int volCubed = 1 << (volumeUnitDegree*3); + __global struct TsdfVoxel * volData = (__global struct TsdfVoxel*) + (allVolumePtr + table_offset + row * volCubed); + int3 ismul = volumeIdx * volStrides; + int coordBase = ismul.x + ismul.y + ismul.z; + return volData[coordBase]; +} + + +static struct TsdfVoxel atVolumeUnit(int3 volumeIdx, int3 volumeUnitIdx, int row, + int volumeUnitDegree, int3 volStrides, + __global const struct TsdfVoxel * allVolumePtr, int table_offset) + +{ + //! Out of bounds + if (row < 0) + { + struct TsdfVoxel dummy; + dummy.tsdf = floatToTsdf(1.0f); + dummy.weight = 0; + return dummy; + } + + int3 volUnitLocalIdx = volumeIdx - (volumeUnitIdx << volumeUnitDegree); + int volCubed = 1 << (volumeUnitDegree*3); + __global struct TsdfVoxel * volData = (__global struct TsdfVoxel*) + (allVolumePtr + table_offset + row * volCubed); + int3 ismul = volUnitLocalIdx * volStrides; + int coordBase = ismul.x + ismul.y + ismul.z; + return volData[coordBase]; +} + +inline float interpolate(float3 t, float8 vz) +{ + float4 vy = mix(vz.s0246, vz.s1357, t.z); + float2 vx = mix(vy.s02, vy.s13, t.y); + return mix(vx.s0, vx.s1, t.x); +} + +inline float3 getNormalVoxel(float3 ptVox, __global const struct TsdfVoxel* allVolumePtr, + int volumeUnitDegree, + const int hash_divisor, + __global const int* hashes, + __global const int4* data, + + int3 volStrides, int table_offset) +{ + float3 normal = (float3) (0.0f, 0.0f, 0.0f); + float3 fip = floor(ptVox); + int3 iptVox = convert_int3(fip); + + // A small hash table to reduce a number of findRow() calls + // -2 and lower means not queried yet + // -1 means not found + // 0+ means found + int iterMap[8]; + for (int i = 0; i < 8; i++) + { + iterMap[i] = -2; + } + +#if !USE_INTERPOLATION_IN_GETNORMAL + int4 offsets[] = { (int4)( 1, 0, 0, 0), (int4)(-1, 0, 0, 0), (int4)( 0, 1, 0, 0), // 0-3 + (int4)( 0, -1, 0, 0), (int4)( 0, 0, 1, 0), (int4)( 0, 0, -1, 0) // 4-7 + }; + + const int nVals = 6; + float vals[6]; +#else + int4 offsets[]={(int4)( 0, 0, 0, 0), (int4)( 0, 0, 1, 0), (int4)( 0, 1, 0, 0), (int4)( 0, 1, 1, 0), // 0-3 + (int4)( 1, 0, 0, 0), (int4)( 1, 0, 1, 0), (int4)( 1, 1, 0, 0), (int4)( 1, 1, 1, 0), // 4-7 + (int4)(-1, 0, 0, 0), (int4)(-1, 0, 1, 0), (int4)(-1, 1, 0, 0), (int4)(-1, 1, 1, 0), // 8-11 + (int4)( 2, 0, 0, 0), (int4)( 2, 0, 1, 0), (int4)( 2, 1, 0, 0), (int4)( 2, 1, 1, 0), // 12-15 + (int4)( 0, -1, 0, 0), (int4)( 0, -1, 1, 0), (int4)( 1, -1, 0, 0), (int4)( 1, -1, 1, 0), // 16-19 + (int4)( 0, 2, 0, 0), (int4)( 0, 2, 1, 0), (int4)( 1, 2, 0, 0), (int4)( 1, 2, 1, 0), // 20-23 + (int4)( 0, 0, -1, 0), (int4)( 0, 1, -1, 0), (int4)( 1, 0, -1, 0), (int4)( 1, 1, -1, 0), // 24-27 + (int4)( 0, 0, 2, 0), (int4)( 0, 1, 2, 0), (int4)( 1, 0, 2, 0), (int4)( 1, 1, 2, 0), // 28-31 + }; + const int nVals = 32; + float vals[32]; +#endif + + for (int i = 0; i < nVals; i++) + { + int3 pt = iptVox + offsets[i].s012; + + // VoxelToVolumeUnitIdx() + int3 volumeUnitIdx = pt >> volumeUnitDegree; + + int3 vand = (volumeUnitIdx & 1); + int dictIdx = vand.s0 + vand.s1 * 2 + vand.s2 * 4; + + int it = iterMap[dictIdx]; + if (it < -1) + { + it = custom_find(volumeUnitIdx, hash_divisor, hashes, data); + iterMap[dictIdx] = it; + } + + struct TsdfVoxel tmp = atVolumeUnit(pt, volumeUnitIdx, it, volumeUnitDegree, volStrides, allVolumePtr, table_offset); + vals[i] = tsdfToFloat( tmp.tsdf ); + } + +#if !USE_INTERPOLATION_IN_GETNORMAL + float3 pv, nv; + + pv = (float3)(vals[0*2 ], vals[1*2 ], vals[2*2 ]); + nv = (float3)(vals[0*2+1], vals[1*2+1], vals[2*2+1]); + normal = pv - nv; +#else + + float cxv[8], cyv[8], czv[8]; + + // How these numbers were obtained: + // 1. Take the basic interpolation sequence: + // 000, 001, 010, 011, 100, 101, 110, 111 + // where each digit corresponds to shift by x, y, z axis respectively. + // 2. Add +1 for next or -1 for prev to each coordinate to corresponding axis + // 3. Search corresponding values in offsets + const int idxxn[8] = { 8, 9, 10, 11, 0, 1, 2, 3 }; + const int idxxp[8] = { 4, 5, 6, 7, 12, 13, 14, 15 }; + const int idxyn[8] = { 16, 17, 0, 1, 18, 19, 4, 5 }; + const int idxyp[8] = { 2, 3, 20, 21, 6, 7, 22, 23 }; + const int idxzn[8] = { 24, 0, 25, 2, 26, 4, 27, 6 }; + const int idxzp[8] = { 1, 28, 3, 29, 5, 30, 7, 31 }; + + float vcxp[8], vcxn[8]; + float vcyp[8], vcyn[8]; + float vczp[8], vczn[8]; + + for (int i = 0; i < 8; i++) + { + vcxp[i] = vals[idxxp[i]]; vcxn[i] = vals[idxxn[i]]; + vcyp[i] = vals[idxyp[i]]; vcyn[i] = vals[idxyn[i]]; + vczp[i] = vals[idxzp[i]]; vczn[i] = vals[idxzn[i]]; + } + + float8 cxp = vload8(0, vcxp), cxn = vload8(0, vcxn); + float8 cyp = vload8(0, vcyp), cyn = vload8(0, vcyn); + float8 czp = vload8(0, vczp), czn = vload8(0, vczn); + float8 cx = cxp - cxn; + float8 cy = cyp - cyn; + float8 cz = czp - czn; + + float3 tv = ptVox - fip; + normal.x = interpolate(tv, cx); + normal.y = interpolate(tv, cy); + normal.z = interpolate(tv, cz); +#endif + + float norm = sqrt(dot(normal, normal)); + return norm < 0.0001f ? nan((uint)0) : normal / norm; +} + +typedef float4 ptype; + +__kernel void raycast( + __global const int* hashes, + __global const int4* data, + __global char * pointsptr, + int points_step, int points_offset, + __global char * normalsptr, + int normals_step, int normals_offset, + const int2 frameSize, + __global const struct TsdfVoxel * allVolumePtr, + int table_step, int table_offset, + int table_rows, int table_cols, + float16 cam2volRotGPU, + float16 vol2camRotGPU, + float truncateThreshold, + const float2 fixy, const float2 cxy, + const float4 boxDown4, const float4 boxUp4, + const float tstep, + const float voxelSize, + const float voxelSizeInv, + float volumeUnitSize, + float truncDist, + int volumeUnitDegree, + int4 volStrides4 + ) +{ + const int hash_divisor = HASH_DIVISOR; + int x = get_global_id(0); + int y = get_global_id(1); + + if(x >= frameSize.x || y >= frameSize.y) + return; + + float3 point = nan((uint)0); + float3 normal = nan((uint)0); + + const float3 camRot0 = cam2volRotGPU.s012; + const float3 camRot1 = cam2volRotGPU.s456; + const float3 camRot2 = cam2volRotGPU.s89a; + const float3 camTrans = cam2volRotGPU.s37b; + + const float3 volRot0 = vol2camRotGPU.s012; + const float3 volRot1 = vol2camRotGPU.s456; + const float3 volRot2 = vol2camRotGPU.s89a; + const float3 volTrans = vol2camRotGPU.s37b; + + float3 planed = (float3)(((float2)(x, y) - cxy)*fixy, 1.f); + planed = (float3)(dot(planed, camRot0), + dot(planed, camRot1), + dot(planed, camRot2)); + + float3 orig = (float3) (camTrans.s0, camTrans.s1, camTrans.s2); + float3 dir = fast_normalize(planed); + float3 origScaled = orig * voxelSizeInv; + float3 dirScaled = dir * voxelSizeInv; + + float tmin = 0; + float tmax = truncateThreshold; + float tcurr = tmin; + float tprev = tcurr; + float prevTsdf = truncDist; + + int3 volStrides = volStrides4.xyz; + + while (tcurr < tmax) + { + float3 currRayPosVox = origScaled + tcurr * dirScaled; + + // VolumeToVolumeUnitIdx() + int3 currVoxel = convert_int3(floor(currRayPosVox)); + int3 currVolumeUnitIdx = currVoxel >> volumeUnitDegree; + + int row = custom_find(currVolumeUnitIdx, hash_divisor, hashes, data); + + float currTsdf = prevTsdf; + int currWeight = 0; + float stepSize = 0.5 * volumeUnitSize; + int3 volUnitLocalIdx; + + if (row >= 0) + { + volUnitLocalIdx = currVoxel - (currVolumeUnitIdx << volumeUnitDegree); + struct TsdfVoxel currVoxel = at(volUnitLocalIdx, row, volumeUnitDegree, volStrides, allVolumePtr, table_offset); + + currTsdf = tsdfToFloat(currVoxel.tsdf); + currWeight = currVoxel.weight; + stepSize = tstep; + } + + if (prevTsdf > 0.f && currTsdf <= 0.f && currWeight > 0) + { + float tInterp = (tcurr * prevTsdf - tprev * currTsdf) / (prevTsdf - currTsdf); + if ( !isnan(tInterp) && !isinf(tInterp) ) + { + float3 pvox = origScaled + tInterp * dirScaled; + float3 nv = getNormalVoxel( pvox, allVolumePtr, volumeUnitDegree, + hash_divisor, hashes, data, + volStrides, table_offset); + + if(!any(isnan(nv))) + { + //convert pv and nv to camera space + normal = (float3)(dot(nv, volRot0), + dot(nv, volRot1), + dot(nv, volRot2)); + // interpolation optimized a little + float3 pv = pvox * voxelSize; + point = (float3)(dot(pv, volRot0), + dot(pv, volRot1), + dot(pv, volRot2)) + volTrans; + } + } + break; + } + prevTsdf = currTsdf; + tprev = tcurr; + tcurr += stepSize; + } + + __global float* pts = (__global float*)(pointsptr + points_offset + y*points_step + x*sizeof(ptype)); + __global float* nrm = (__global float*)(normalsptr + normals_offset + y*normals_step + x*sizeof(ptype)); + vstore4((float4)(point, 0), 0, pts); + vstore4((float4)(normal, 0), 0, nrm); +} + + +__kernel void markActive ( + __global const int4* hashSetData, + + __global char* isActiveFlagsPtr, + int isActiveFlagsStep, int isActiveFlagsOffset, + int isActiveFlagsRows, int isActiveFlagsCols, + + __global char* lastVisibleIndicesPtr, + int lastVisibleIndicesStep, int lastVisibleIndicesOffset, + int lastVisibleIndicesRows, int lastVisibleIndicesCols, + + const float16 vol2cam, + const float2 fxy, + const float2 cxy, + const int2 frameSz, + const float volumeUnitSize, + const int lastVolIndex, + const float truncateThreshold, + const int frameId + ) +{ + const int hash_divisor = HASH_DIVISOR; + int row = get_global_id(0); + + if (row < lastVolIndex) + { + int3 idx = hashSetData[row].xyz; + + float3 volumeUnitPos = convert_float3(idx) * volumeUnitSize; + + float3 volUnitInCamSpace = (float3) (dot(volumeUnitPos, vol2cam.s012), + dot(volumeUnitPos, vol2cam.s456), + dot(volumeUnitPos, vol2cam.s89a)) + vol2cam.s37b; + + if (volUnitInCamSpace.z < 0 || volUnitInCamSpace.z > truncateThreshold) + { + *(isActiveFlagsPtr + isActiveFlagsOffset + row * isActiveFlagsStep) = 0; + return; + } + + float2 cameraPoint; + float invz = 1.f / volUnitInCamSpace.z; + cameraPoint = fxy * volUnitInCamSpace.xy * invz + cxy; + + if (all(cameraPoint >= 0) && all(cameraPoint < convert_float2(frameSz))) + { + *(__global int*)(lastVisibleIndicesPtr + lastVisibleIndicesOffset + row * lastVisibleIndicesStep) = frameId; + *(isActiveFlagsPtr + isActiveFlagsOffset + row * isActiveFlagsStep) = 1; + } + } +} diff --git a/modules/rgbd/src/opencl/tsdf.cl b/modules/rgbd/src/opencl/tsdf.cl index b9186826236..3a13d3b8336 100644 --- a/modules/rgbd/src/opencl/tsdf.cl +++ b/modules/rgbd/src/opencl/tsdf.cl @@ -26,20 +26,6 @@ static inline float tsdfToFloat(TsdfType num) return ( (float) num ) / (-128); } -__kernel void preCalculationPixNorm (__global float * pixNorms, - int pix_step, int pix_offset, - int pix_rows, int pix_cols, - const __global float * xx, - const __global float * yy, - int width, int height) -{ - int i = get_global_id(0); - int j = get_global_id(1); - int idx = i*width + j; - if(i < height && j < width && idx < pix_cols) - pixNorms[idx] = sqrt(xx[j] * xx[j] + yy[i] * yy[i] + 1.0f); -} - __kernel void integrate(__global const char * depthptr, int depth_step, int depth_offset, int depth_rows, int depth_cols, diff --git a/modules/rgbd/src/opencl/tsdf_functions.cl b/modules/rgbd/src/opencl/tsdf_functions.cl new file mode 100644 index 00000000000..40efeca57c8 --- /dev/null +++ b/modules/rgbd/src/opencl/tsdf_functions.cl @@ -0,0 +1,19 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory + +__kernel void preCalculationPixNorm (__global char * pixNormsPtr, + int pixNormsStep, int pixNormsOffset, + int pixNormsRows, int pixNormsCols, + const __global float * xx, + const __global float * yy) +{ + int i = get_global_id(0); + int j = get_global_id(1); + if (i < pixNormsRows && j < pixNormsCols) + { + *(__global float*)(pixNormsPtr + pixNormsOffset + i*pixNormsStep + j*sizeof(float)) = sqrt(xx[j] * xx[j] + yy[i] * yy[i] + 1.0f); + } +} \ No newline at end of file diff --git a/modules/rgbd/src/submap.hpp b/modules/rgbd/src/submap.hpp index 65a2bed6229..b40023b6a56 100644 --- a/modules/rgbd/src/submap.hpp +++ b/modules/rgbd/src/submap.hpp @@ -42,7 +42,7 @@ class Submap Submap(int _id, const VolumeParams& volumeParams, const cv::Affine3f& _pose = cv::Affine3f::Identity(), int _startFrameId = 0) - : id(_id), pose(_pose), cameraPose(Affine3f::Identity()), startFrameId(_startFrameId), volume(volumeParams) + : id(_id), pose(_pose), cameraPose(Affine3f::Identity()), startFrameId(_startFrameId), volume(makeHashTSDFVolume(volumeParams)) { std::cout << "Created volume\n"; } @@ -53,10 +53,10 @@ class Submap OutputArray points, OutputArray normals); virtual void updatePyrPointsNormals(const int pyramidLevels); - virtual int getTotalAllocatedBlocks() const { return int(volume.getTotalVolumeUnits()); }; + virtual int getTotalAllocatedBlocks() const { return int(volume->getTotalVolumeUnits()); }; virtual int getVisibleBlocks(int currFrameId) const { - return volume.getVisibleBlocks(currFrameId, FRAME_VISIBILITY_THRESHOLD); + return volume->getVisibleBlocks(currFrameId, FRAME_VISIBILITY_THRESHOLD); } float calcVisibilityRatio(int currFrameId) const @@ -91,7 +91,7 @@ class Submap //! TODO: Add support for GPU arrays (UMat) std::vector pyrPoints; std::vector pyrNormals; - HashTSDFVolumeCPU volume; + std::shared_ptr volume; }; template @@ -100,14 +100,14 @@ void Submap::integrate(InputArray _depth, float depthFactor, const cv:: const int currFrameId) { CV_Assert(currFrameId >= startFrameId); - volume.integrate(_depth, depthFactor, cameraPose.matrix, intrinsics, currFrameId); + volume->integrate(_depth, depthFactor, cameraPose.matrix, intrinsics, currFrameId); } template void Submap::raycast(const cv::Affine3f& _cameraPose, const cv::kinfu::Intr& intrinsics, cv::Size frameSize, OutputArray points, OutputArray normals) { - volume.raycast(_cameraPose.matrix, intrinsics, frameSize, points, normals); + volume->raycast(_cameraPose.matrix, intrinsics, frameSize, points, normals); } template diff --git a/modules/rgbd/src/tsdf.cpp b/modules/rgbd/src/tsdf.cpp index 412c2fe789e..0a0fcfacf05 100644 --- a/modules/rgbd/src/tsdf.cpp +++ b/modules/rgbd/src/tsdf.cpp @@ -102,10 +102,7 @@ TsdfVoxel TSDFVolumeCPU::at(const Vec3i& volumeIdx) const (volumeIdx[1] >= volResolution.y || volumeIdx[1] < 0) || (volumeIdx[2] >= volResolution.z || volumeIdx[2] < 0)) { - TsdfVoxel dummy; - dummy.tsdf = floatToTsdf(1.0f); - dummy.weight = 0; - return dummy; + return TsdfVoxel(floatToTsdf(1.f), 0); } const TsdfVoxel* volData = volume.ptr(); @@ -836,47 +833,6 @@ void TSDFVolumeGPU::reset() volume.setTo(Scalar(0, 0)); } -static void preCalculationPixNormGPU(int depth_rows, int depth_cols, Vec2f fxy, Vec2f cxy, UMat& pixNorm) -{ - Mat x(1, depth_cols, CV_32F); - Mat y(1, depth_rows, CV_32F); - pixNorm.create(1, depth_rows * depth_cols, CV_32F); - - for (int i = 0; i < depth_cols; i++) - x.at(0, i) = (i - cxy[0]) / fxy[0]; - for (int i = 0; i < depth_rows; i++) - y.at(0, i) = (i - cxy[1]) / fxy[1]; - - cv::String errorStr; - cv::String name = "preCalculationPixNorm"; - ocl::ProgramSource source = ocl::rgbd::tsdf_oclsrc; - cv::String options = "-cl-mad-enable"; - ocl::Kernel kk; - kk.create(name.c_str(), source, options, &errorStr); - - - if (kk.empty()) - throw std::runtime_error("Failed to create kernel: " + errorStr); - - AccessFlag af = ACCESS_READ; - UMat xx = x.getUMat(af); - UMat yy = y.getUMat(af); - - kk.args(ocl::KernelArg::ReadWrite(pixNorm), - ocl::KernelArg::PtrReadOnly(xx), - ocl::KernelArg::PtrReadOnly(yy), - depth_cols, depth_rows); - - size_t globalSize[2]; - globalSize[0] = depth_rows; - globalSize[1] = depth_cols; - - if (!kk.run(2, globalSize, NULL, true)) - throw std::runtime_error("Failed to run kernel"); - - return; -} - // use depth instead of distance (optimization) void TSDFVolumeGPU::integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const Intr& intrinsics, const int frameId) @@ -901,15 +857,13 @@ void TSDFVolumeGPU::integrate(InputArray _depth, float depthFactor, float dfac = 1.f/depthFactor; Vec4i volResGpu(volResolution.x, volResolution.y, volResolution.z); Vec2f fxy(intrinsics.fx, intrinsics.fy), cxy(intrinsics.cx, intrinsics.cy); - if (!(frameParams[0] == depth.rows && frameParams[1] == depth.cols && - frameParams[2] == intrinsics.fx && frameParams[3] == intrinsics.fy && - frameParams[4] == intrinsics.cx && frameParams[5] == intrinsics.cy)) + Vec6f newParams((float)depth.rows, (float)depth.cols, + intrinsics.fx, intrinsics.fy, + intrinsics.cx, intrinsics.cy); + if (!(frameParams == newParams)) { - frameParams[0] = (float)depth.rows; frameParams[1] = (float)depth.cols; - frameParams[2] = intrinsics.fx; frameParams[3] = intrinsics.fy; - frameParams[4] = intrinsics.cx; frameParams[5] = intrinsics.cy; - - preCalculationPixNormGPU(depth.rows, depth.cols, fxy, cxy, pixNorms); + frameParams = newParams; + pixNorms = preCalculationPixNormGPU(depth, intrinsics); } // TODO: optimization possible diff --git a/modules/rgbd/src/tsdf.hpp b/modules/rgbd/src/tsdf.hpp index 009c4a8466c..67361aa59d9 100644 --- a/modules/rgbd/src/tsdf.hpp +++ b/modules/rgbd/src/tsdf.hpp @@ -17,15 +17,15 @@ namespace cv { namespace kinfu { -// TODO: Optimization possible: -// * TsdfType can be FP16 -// * WeightType can be uint16 typedef int8_t TsdfType; typedef uchar WeightType; struct TsdfVoxel { + TsdfVoxel(TsdfType _tsdf, WeightType _weight) : + tsdf(_tsdf), weight(_weight) + { } TsdfType tsdf; WeightType weight; }; @@ -107,8 +107,7 @@ class TSDFVolumeGPU : public TSDFVolume UMat pixNorms; // See zFirstMemOrder arg of parent class constructor // for the array layout info - // Array elem is CV_32FC2, read as (float, int) - // TODO: optimization possible to (fp16, int16), see Voxel definition + // Array elem is CV_8UC2, read as (int8, uint8) UMat volume; }; #endif diff --git a/modules/rgbd/src/tsdf_functions.cpp b/modules/rgbd/src/tsdf_functions.cpp index ff40dce248d..b0e5276cba7 100644 --- a/modules/rgbd/src/tsdf_functions.cpp +++ b/modules/rgbd/src/tsdf_functions.cpp @@ -6,6 +6,7 @@ #include "precomp.hpp" #include "tsdf_functions.hpp" +#include "opencl_kernels_rgbd.hpp" namespace cv { @@ -35,6 +36,51 @@ cv::Mat preCalculationPixNorm(Depth depth, const Intr& intrinsics) return pixNorm; } +#ifdef HAVE_OPENCL +cv::UMat preCalculationPixNormGPU(const UMat& depth, const Intr& intrinsics) +{ + int depth_cols = depth.cols; + int depth_rows = depth.rows; + Point2f fl(intrinsics.fx, intrinsics.fy); + Point2f pp(intrinsics.cx, intrinsics.cy); + Mat x(1, depth_cols, CV_32FC1); + Mat y(1, depth_rows, CV_32FC1); + UMat pixNorm(depth_rows, depth_cols, CV_32F); + + for (int i = 0; i < depth_cols; i++) + x.at(i) = (i - pp.x) / fl.x; + for (int i = 0; i < depth_rows; i++) + y.at(i) = (i - pp.y) / fl.y; + + cv::String errorStr; + cv::String name = "preCalculationPixNorm"; + ocl::ProgramSource source = ocl::rgbd::tsdf_functions_oclsrc; + cv::String options = "-cl-mad-enable"; + ocl::Kernel kk; + kk.create(name.c_str(), source, options, &errorStr); + + if (kk.empty()) + throw std::runtime_error("Failed to create kernel: " + errorStr); + + AccessFlag af = ACCESS_READ; + UMat xx = x.getUMat(af); + UMat yy = y.getUMat(af); + + kk.args(ocl::KernelArg::WriteOnly(pixNorm), + ocl::KernelArg::PtrReadOnly(xx), + ocl::KernelArg::PtrReadOnly(yy)); + + size_t globalSize[2]; + globalSize[0] = depth_rows; + globalSize[1] = depth_cols; + + if (!kk.run(2, globalSize, NULL, true)) + throw std::runtime_error("Failed to run kernel"); + + return pixNorm; +} +#endif + const bool fixMissingData = false; depthType bilinearDepth(const Depth& m, cv::Point2f pt) { @@ -254,7 +300,7 @@ void integrateVolumeUnit( if (!(_u >= 0 && _u < depth.cols && _v >= 0 && _v < depth.rows)) continue; float pixNorm = pixNorms.at(_v, _u); - // float pixNorm = sqrt(v_reduce_sum(camPixVec*camPixVec)); + //float pixNorm = sqrt(v_reduce_sum(camPixVec*camPixVec)); // difference between distances of point and of surface to camera float sdf = pixNorm * (v * dfac - zCamSpace); // possible alternative is: diff --git a/modules/rgbd/src/tsdf_functions.hpp b/modules/rgbd/src/tsdf_functions.hpp index 6d86595118f..09efecf1252 100644 --- a/modules/rgbd/src/tsdf_functions.hpp +++ b/modules/rgbd/src/tsdf_functions.hpp @@ -35,6 +35,8 @@ inline float tsdfToFloat(TsdfType num) } cv::Mat preCalculationPixNorm(Depth depth, const Intr& intrinsics); +cv::UMat preCalculationPixNormGPU(const UMat& depth, const Intr& intrinsics); + depthType bilinearDepth(const Depth& m, cv::Point2f pt); void integrateVolumeUnit( @@ -43,6 +45,246 @@ void integrateVolumeUnit( InputArray _depth, float depthFactor, const cv::Matx44f& cameraPose, const cv::kinfu::Intr& intrinsics, InputArray _pixNorms, InputArray _volume); + +class CustomHashSet +{ +public: + static const int hashDivisor = 32768; + static const int startCapacity = 2048; + + std::vector hashes; + // 0-3 for key, 4th for internal use + // don't keep keep value + std::vector data; + int capacity; + int last; + + CustomHashSet() + { + hashes.resize(hashDivisor); + for (int i = 0; i < hashDivisor; i++) + hashes[i] = -1; + capacity = startCapacity; + + data.resize(capacity); + for (int i = 0; i < capacity; i++) + data[i] = { 0, 0, 0, -1 }; + + last = 0; + } + + ~CustomHashSet() { } + + inline size_t calc_hash(Vec3i x) const + { + uint32_t seed = 0; + constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9; + for (int i = 0; i < 3; i++) + { + seed ^= x[i] + GOLDEN_RATIO + (seed << 6) + (seed >> 2); + } + return seed; + } + + // should work on existing elements too + // 0 - need resize + // 1 - idx is inserted + // 2 - idx already exists + int insert(Vec3i idx) + { + if (last < capacity) + { + int hash = int(calc_hash(idx) % hashDivisor); + int place = hashes[hash]; + if (place >= 0) + { + int oldPlace = place; + while (place >= 0) + { + if (data[place][0] == idx[0] && + data[place][1] == idx[1] && + data[place][2] == idx[2]) + return 2; + else + { + oldPlace = place; + place = data[place][3]; + //std::cout << "place=" << place << std::endl; + } + } + + // found, create here + data[oldPlace][3] = last; + } + else + { + // insert at last + hashes[hash] = last; + } + + data[last][0] = idx[0]; + data[last][1] = idx[1]; + data[last][2] = idx[2]; + data[last][3] = -1; + last++; + + return 1; + } + else + return 0; + } + + int find(Vec3i idx) const + { + int hash = int(calc_hash(idx) % hashDivisor); + int place = hashes[hash]; + // search a place + while (place >= 0) + { + if (data[place][0] == idx[0] && + data[place][1] == idx[1] && + data[place][2] == idx[2]) + break; + else + { + place = data[place][3]; + } + } + + return place; + } +}; + +// TODO: remove this structure as soon as HashTSDFGPU data is completely on GPU; +// until then CustomHashTable can be replaced by this one if needed + +const int NAN_ELEMENT = -2147483647; + +struct Volume_NODE +{ + Vec4i idx = Vec4i(NAN_ELEMENT); + int32_t row = -1; + int32_t nextVolumeRow = -1; + int32_t dummy = 0; + int32_t dummy2 = 0; +}; + +const int _hash_divisor = 32768; +const int _list_size = 4; + +class VolumesTable +{ +public: + const int hash_divisor = _hash_divisor; + const int list_size = _list_size; + const int32_t free_row = -1; + const int32_t free_isActive = 0; + + const cv::Vec4i nan4 = cv::Vec4i(NAN_ELEMENT); + + int bufferNums; + cv::Mat volumes; + + VolumesTable() : bufferNums(1) + { + this->volumes = cv::Mat(hash_divisor * list_size, 1, rawType()); + for (int i = 0; i < volumes.size().height; i++) + { + Volume_NODE* v = volumes.ptr(i); + v->idx = nan4; + v->row = -1; + v->nextVolumeRow = -1; + } + } + const VolumesTable& operator=(const VolumesTable& vt) + { + this->volumes = vt.volumes; + this->bufferNums = vt.bufferNums; + return *this; + } + ~VolumesTable() {}; + + bool insert(Vec3i idx, int row) + { + CV_Assert(row >= 0); + + int bufferNum = 0; + int hash = int(calc_hash(idx) % hash_divisor); + int start = getPos(idx, bufferNum); + int i = start; + + while (i >= 0) + { + Volume_NODE* v = volumes.ptr(i); + + if (v->idx[0] == NAN_ELEMENT) + { + Vec4i idx4(idx[0], idx[1], idx[2], 0); + + bool extend = false; + if (i != start && i % list_size == 0) + { + if (bufferNum >= bufferNums - 1) + { + extend = true; + volumes.resize(hash_divisor * bufferNums); + bufferNums++; + } + bufferNum++; + v->nextVolumeRow = (bufferNum * hash_divisor + hash) * list_size; + } + else + { + v->nextVolumeRow = i + 1; + } + + v->idx = idx4; + v->row = row; + + return extend; + } + + i = v->nextVolumeRow; + } + return false; + } + int findRow(Vec3i idx) const + { + int bufferNum = 0; + int i = getPos(idx, bufferNum); + + while (i >= 0) + { + const Volume_NODE* v = volumes.ptr(i); + + if (v->idx == Vec4i(idx[0], idx[1], idx[2], 0)) + return v->row; + else + i = v->nextVolumeRow; + } + + return -1; + } + + inline int getPos(Vec3i idx, int bufferNum) const + { + int hash = int(calc_hash(idx) % hash_divisor); + return (bufferNum * hash_divisor + hash) * list_size; + } + + inline size_t calc_hash(Vec3i x) const + { + uint32_t seed = 0; + constexpr uint32_t GOLDEN_RATIO = 0x9e3779b9; + for (int i = 0; i < 3; i++) + { + seed ^= x[i] + GOLDEN_RATIO + (seed << 6) + (seed >> 2); + } + return seed; + } +}; + + } // namespace kinfu } // namespace cv #endif diff --git a/modules/rgbd/src/volume.cpp b/modules/rgbd/src/volume.cpp index 8177213e8ab..88c46fe4b67 100644 --- a/modules/rgbd/src/volume.cpp +++ b/modules/rgbd/src/volume.cpp @@ -68,7 +68,7 @@ Ptr makeVolume(const VolumeParams& _volumeParams) if(_volumeParams.type == VolumeType::TSDF) return kinfu::makeTSDFVolume(_volumeParams); else if(_volumeParams.type == VolumeType::HASHTSDF) - return kinfu::makeHashTSDFVolume(_volumeParams); + return kinfu::makeHashTSDFVolume(_volumeParams); CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters"); } @@ -79,13 +79,11 @@ Ptr makeVolume(VolumeType _volumeType, float _voxelSize, Matx44f _pose, Point3i _presolution = _resolution; if (_volumeType == VolumeType::TSDF) { - return makeTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, - _presolution); + return makeTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _presolution); } else if (_volumeType == VolumeType::HASHTSDF) { - return makeHashTSDFVolume( - _voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold); + return makeHashTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold); } CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters"); } diff --git a/modules/rgbd/test/test_tsdf.cpp b/modules/rgbd/test/test_tsdf.cpp index 99da9579697..5a9b23afb95 100644 --- a/modules/rgbd/test/test_tsdf.cpp +++ b/modules/rgbd/test/test_tsdf.cpp @@ -358,7 +358,7 @@ void normal_test(bool isHashTSDF, bool isRaycast, bool isFetchPointsNormals, boo points = _points.getMat(af); renderPointsNormals(points, normals, image, _params->lightPose); imshow("render", image); - waitKey(20000); + waitKey(2000); } if (isRaycast) @@ -384,7 +384,7 @@ void normal_test(bool isHashTSDF, bool isRaycast, bool isFetchPointsNormals, boo points = _newPoints.getMat(af); renderPointsNormals(points, normals, image, _params->lightPose); imshow("render", image); - waitKey(20000); + waitKey(2000); } } @@ -448,7 +448,7 @@ void valid_points_test(bool isHashTSDF) imshow("depth", depth * (1.f / _params->depthFactor / 4.f)); renderPointsNormals(points, normals, image, _params->lightPose); imshow("render", image); - waitKey(20000); + waitKey(2000); } volume->raycast(poses[17].matrix, _params->intr, _params->frameSize, _newPoints, _newNormals); @@ -463,10 +463,13 @@ void valid_points_test(bool isHashTSDF) imshow("depth", depth * (1.f / _params->depthFactor / 4.f)); renderPointsNormals(points, normals, image, _params->lightPose); imshow("render", image); - waitKey(20000); + waitKey(2000); } - float percentValidity = float(profile) / float(anfas); + float percentValidity; + if (profile == 0) percentValidity = -0.5; + else if (anfas == 0) percentValidity = 0; + else percentValidity = float(profile) / float(anfas); ASSERT_LT(0.5 - percentValidity, 0.3); } @@ -510,4 +513,5 @@ TEST(HashTSDF, valid_points) valid_points_test(true); } + }} // namespace