|
| 1 | +// Copyright (c) OpenMMLab. All rights reserved. |
| 2 | +#include "rtmdet_head.h" |
| 3 | + |
| 4 | +#include <math.h> |
| 5 | + |
| 6 | +#include <algorithm> |
| 7 | +#include <numeric> |
| 8 | + |
| 9 | +#include "mmdeploy/core/model.h" |
| 10 | +#include "mmdeploy/core/utils/device_utils.h" |
| 11 | +#include "mmdeploy/core/utils/formatter.h" |
| 12 | +#include "utils.h" |
| 13 | + |
| 14 | +namespace mmdeploy::mmdet { |
| 15 | + |
| 16 | +RTMDetSepBNHead::RTMDetSepBNHead(const Value& cfg) : MMDetection(cfg) { |
| 17 | + auto init = [&]() -> Result<void> { |
| 18 | + auto model = cfg["context"]["model"].get<Model>(); |
| 19 | + if (cfg.contains("params")) { |
| 20 | + nms_pre_ = cfg["params"].value("nms_pre", -1); |
| 21 | + score_thr_ = cfg["params"].value("score_thr", 0.02f); |
| 22 | + min_bbox_size_ = cfg["params"].value("min_bbox_size", 0); |
| 23 | + max_per_img_ = cfg["params"].value("max_per_img", 100); |
| 24 | + iou_threshold_ = cfg["params"].contains("nms") |
| 25 | + ? cfg["params"]["nms"].value("iou_threshold", 0.45f) |
| 26 | + : 0.45f; |
| 27 | + if (cfg["params"].contains("anchor_generator")) { |
| 28 | + offset_ = cfg["params"]["anchor_generator"].value("offset", 0); |
| 29 | + from_value(cfg["params"]["anchor_generator"]["strides"], strides_); |
| 30 | + } |
| 31 | + } |
| 32 | + return success(); |
| 33 | + }; |
| 34 | + init().value(); |
| 35 | +} |
| 36 | + |
| 37 | +Result<Value> RTMDetSepBNHead::operator()(const Value& prep_res, const Value& infer_res) { |
| 38 | + MMDEPLOY_DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res); |
| 39 | + try { |
| 40 | + std::vector<Tensor> cls_scores; |
| 41 | + std::vector<Tensor> bbox_preds; |
| 42 | + const Device kHost{0, 0}; |
| 43 | + int i = 0; |
| 44 | + int divisor = infer_res.size() / 2; |
| 45 | + for (auto iter = infer_res.begin(); iter != infer_res.end(); iter++) { |
| 46 | + auto pred_map = iter->get<Tensor>(); |
| 47 | + OUTCOME_TRY(auto _pred_map, MakeAvailableOnDevice(pred_map, kHost, stream())); |
| 48 | + if (i < divisor) |
| 49 | + cls_scores.push_back(_pred_map); |
| 50 | + else |
| 51 | + bbox_preds.push_back(_pred_map); |
| 52 | + i++; |
| 53 | + } |
| 54 | + OUTCOME_TRY(stream().Wait()); |
| 55 | + OUTCOME_TRY(auto result, GetBBoxes(prep_res["img_metas"], bbox_preds, cls_scores)); |
| 56 | + return to_value(result); |
| 57 | + } catch (...) { |
| 58 | + return Status(eFail); |
| 59 | + } |
| 60 | +} |
| 61 | + |
| 62 | +static float sigmoid(float x) { return 1.0 / (1.0 + expf(-x)); } |
| 63 | + |
| 64 | +Result<Detections> RTMDetSepBNHead::GetBBoxes(const Value& prep_res, |
| 65 | + const std::vector<Tensor>& bbox_preds, |
| 66 | + const std::vector<Tensor>& cls_scores) const { |
| 67 | + MMDEPLOY_DEBUG("bbox_pred: {}, {}", bbox_preds[0].shape(), dets[0].data_type()); |
| 68 | + MMDEPLOY_DEBUG("cls_score: {}, {}", scores[0].shape(), scores[0].data_type()); |
| 69 | + |
| 70 | + std::vector<float> filter_boxes; |
| 71 | + std::vector<float> obj_probs; |
| 72 | + std::vector<int> class_ids; |
| 73 | + |
| 74 | + for (int i = 0; i < bbox_preds.size(); i++) { |
| 75 | + RTMDetFeatDeocde(bbox_preds[i], cls_scores[i], strides_[i], offset_, filter_boxes, obj_probs, |
| 76 | + class_ids); |
| 77 | + } |
| 78 | + |
| 79 | + std::vector<int> indexArray; |
| 80 | + for (int i = 0; i < obj_probs.size(); ++i) { |
| 81 | + indexArray.push_back(i); |
| 82 | + } |
| 83 | + Sort(obj_probs, class_ids, indexArray); |
| 84 | + |
| 85 | + Tensor dets(TensorDesc{Device{0, 0}, DataType::kFLOAT, |
| 86 | + TensorShape{int(filter_boxes.size() / 4), 4}, "dets"}); |
| 87 | + std::copy(filter_boxes.begin(), filter_boxes.end(), dets.data<float>()); |
| 88 | + NMS(dets, iou_threshold_, indexArray); |
| 89 | + |
| 90 | + Detections objs; |
| 91 | + std::vector<float> scale_factor; |
| 92 | + if (prep_res.contains("scale_factor")) { |
| 93 | + from_value(prep_res["scale_factor"], scale_factor); |
| 94 | + } else { |
| 95 | + scale_factor = {1.f, 1.f, 1.f, 1.f}; |
| 96 | + } |
| 97 | + int ori_width = prep_res["ori_shape"][2].get<int>(); |
| 98 | + int ori_height = prep_res["ori_shape"][1].get<int>(); |
| 99 | + auto det_ptr = dets.data<float>(); |
| 100 | + for (int i = 0; i < indexArray.size(); ++i) { |
| 101 | + if (indexArray[i] == -1) { |
| 102 | + continue; |
| 103 | + } |
| 104 | + int j = indexArray[i]; |
| 105 | + auto x1 = det_ptr[j * 4 + 0]; |
| 106 | + auto y1 = det_ptr[j * 4 + 1]; |
| 107 | + auto x2 = det_ptr[j * 4 + 2]; |
| 108 | + auto y2 = det_ptr[j * 4 + 3]; |
| 109 | + int label_id = class_ids[i]; |
| 110 | + float score = obj_probs[i]; |
| 111 | + |
| 112 | + MMDEPLOY_DEBUG("{}-th box: ({}, {}, {}, {}), {}, {}", i, x1, y1, x2, y2, label_id, score); |
| 113 | + |
| 114 | + auto rect = |
| 115 | + MapToOriginImage(x1, y1, x2, y2, scale_factor.data(), 0, 0, ori_width, ori_height, 0, 0); |
| 116 | + if (rect[2] - rect[0] < min_bbox_size_ || rect[3] - rect[1] < min_bbox_size_) { |
| 117 | + MMDEPLOY_DEBUG("ignore small bbox with width '{}' and height '{}", rect[2] - rect[0], |
| 118 | + rect[3] - rect[1]); |
| 119 | + continue; |
| 120 | + } |
| 121 | + Detection det{}; |
| 122 | + det.index = i; |
| 123 | + det.label_id = label_id; |
| 124 | + det.score = score; |
| 125 | + det.bbox = rect; |
| 126 | + objs.push_back(std::move(det)); |
| 127 | + } |
| 128 | + |
| 129 | + return objs; |
| 130 | +} |
| 131 | + |
| 132 | +int RTMDetSepBNHead::RTMDetFeatDeocde(const Tensor& bbox_pred, const Tensor& cls_score, |
| 133 | + const float stride, const float offset, |
| 134 | + std::vector<float>& filter_boxes, |
| 135 | + std::vector<float>& obj_probs, |
| 136 | + std::vector<int>& class_ids) const { |
| 137 | + int cls_param_num = cls_score.shape(1); |
| 138 | + int feat_h = bbox_pred.shape(2); |
| 139 | + int feat_w = bbox_pred.shape(3); |
| 140 | + int feat_size = feat_h * feat_w; |
| 141 | + auto bbox_ptr = bbox_pred.data<float>(); |
| 142 | + auto score_ptr = cls_score.data<float>(); // (b, c, h, w) |
| 143 | + int valid_count = 0; |
| 144 | + for (int i = 0; i < feat_h; i++) { |
| 145 | + for (int j = 0; j < feat_w; j++) { |
| 146 | + float max_score = score_ptr[i * feat_w + j]; |
| 147 | + int class_id = 0; |
| 148 | + for (int k = 0; k < cls_param_num; k++) { |
| 149 | + float score = score_ptr[k * feat_size + i * feat_w + j]; |
| 150 | + if (score > max_score) { |
| 151 | + max_score = score; |
| 152 | + class_id = k; |
| 153 | + } |
| 154 | + } |
| 155 | + max_score = sigmoid(max_score); |
| 156 | + if (max_score < score_thr_) continue; |
| 157 | + |
| 158 | + obj_probs.push_back(max_score); |
| 159 | + class_ids.push_back(class_id); |
| 160 | + |
| 161 | + float tl_x = bbox_ptr[0 * feat_size + i * feat_w + j]; |
| 162 | + float tl_y = bbox_ptr[1 * feat_size + i * feat_w + j]; |
| 163 | + float br_x = bbox_ptr[2 * feat_size + i * feat_w + j]; |
| 164 | + float br_y = bbox_ptr[3 * feat_size + i * feat_w + j]; |
| 165 | + |
| 166 | + auto box = RTMDetdecode(tl_x, tl_y, br_x, br_y, stride, offset, j, i); |
| 167 | + |
| 168 | + tl_x = box[0]; |
| 169 | + tl_y = box[1]; |
| 170 | + br_x = box[2]; |
| 171 | + br_y = box[3]; |
| 172 | + |
| 173 | + filter_boxes.push_back(tl_x); |
| 174 | + filter_boxes.push_back(tl_y); |
| 175 | + filter_boxes.push_back(br_x); |
| 176 | + filter_boxes.push_back(br_y); |
| 177 | + valid_count++; |
| 178 | + } |
| 179 | + } |
| 180 | + return valid_count; |
| 181 | +} |
| 182 | + |
| 183 | +std::array<float, 4> RTMDetSepBNHead::RTMDetdecode(float tl_x, float tl_y, float br_x, float br_y, |
| 184 | + float stride, float offset, int j, int i) const { |
| 185 | + tl_x = (offset + j) * stride - tl_x; |
| 186 | + tl_y = (offset + i) * stride - tl_y; |
| 187 | + br_x = (offset + j) * stride + br_x; |
| 188 | + br_y = (offset + i) * stride + br_y; |
| 189 | + return std::array<float, 4>{tl_x, tl_y, br_x, br_y}; |
| 190 | +} |
| 191 | + |
| 192 | +MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMDetection, RTMDetSepBNHead); |
| 193 | + |
| 194 | +} // namespace mmdeploy::mmdet |
0 commit comments