Skip to content

Commit d76c7b6

Browse files
[Fix] Fix errors about deploying MMYOLO-OpenVINO, DETR, ConvFormer and RTMDet (#1919)
* fix reg test yolox * fix detr * fix rtmdet-sdk reg * fix conformer precision * add conformer_cls sdk * add mmcls ut * fix detr ut * fix detr ut * fix lint * fix yapf * fix cls sdk * fix detr_head rewriter * fix interpolate * complement the mmdet ut * fix regression DETR" * fix ut * fix ut version * fix lint
1 parent 502692b commit d76c7b6

File tree

11 files changed

+298
-36
lines changed

11 files changed

+298
-36
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
_base_ = ['./base_dynamic.py', '../../_base_/backends/openvino.py']
2+
3+
onnx_config = dict(input_shape=None)
4+
5+
backend_config = dict(
6+
model_inputs=[dict(opt_shapes=dict(input=[1, 3, 640, 640]))])
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
_base_ = ['../_base_/base_openvino_dynamic-640x640.py']

csrc/mmdeploy/codebase/mmcls/linear_cls.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ class LinearClsHead : public MMClassification {
8585
};
8686

8787
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMClassification, LinearClsHead);
88+
using ConformerHead = LinearClsHead;
89+
MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMClassification, ConformerHead);
8890

8991
class CropBox {
9092
public:

mmdeploy/codebase/mmdet/models/dense_heads/detr_head.py

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,6 @@
88
from mmdeploy.core import FUNCTION_REWRITER
99

1010

11-
@FUNCTION_REWRITER.register_rewriter(
12-
'mmdet.models.dense_heads.DETRHead.forward_single')
13-
def detrhead__forward_single__default(self, x, img_metas):
14-
"""forward_single of DETRHead.
15-
16-
Ease the mask computation
17-
"""
18-
19-
batch_size = x.size(0)
20-
21-
x = self.input_proj(x)
22-
# interpolate masks to have the same spatial shape with x
23-
masks = x.new_zeros((batch_size, x.size(-2), x.size(-1))).to(torch.bool)
24-
25-
# position encoding
26-
pos_embed = self.positional_encoding(masks) # [bs, embed_dim, h, w]
27-
# outs_dec: [nb_dec, bs, num_query, embed_dim]
28-
outs_dec, _ = self.transformer(x, masks, self.query_embedding.weight,
29-
pos_embed)
30-
all_cls_scores = self.fc_cls(outs_dec)
31-
all_bbox_preds = self.fc_reg(self.activate(
32-
self.reg_ffn(outs_dec))).sigmoid()
33-
return all_cls_scores, all_bbox_preds
34-
35-
3611
@FUNCTION_REWRITER.register_rewriter(
3712
'mmdet.models.dense_heads.DETRHead.predict_by_feat')
3813
def detrhead__predict_by_feat__default(self,
@@ -42,8 +17,8 @@ def detrhead__predict_by_feat__default(self,
4217
rescale: bool = True):
4318
"""Rewrite `predict_by_feat` of `FoveaHead` for default backend."""
4419
from mmdet.structures.bbox import bbox_cxcywh_to_xyxy
45-
cls_scores = all_cls_scores_list[-1][-1]
46-
bbox_preds = all_bbox_preds_list[-1][-1]
20+
cls_scores = all_cls_scores_list[-1]
21+
bbox_preds = all_bbox_preds_list[-1]
4722

4823
img_shape = batch_img_metas[0]['img_shape']
4924
max_per_img = self.test_cfg.get('max_per_img', len(cls_scores[0]))
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Copyright (c) OpenMMLab. All rights reserved.
2-
from . import single_stage, single_stage_instance_seg, two_stage
2+
from . import base_detr, single_stage, single_stage_instance_seg, two_stage
33

4-
__all__ = ['single_stage', 'single_stage_instance_seg', 'two_stage']
4+
__all__ = [
5+
'base_detr', 'single_stage', 'single_stage_instance_seg', 'two_stage'
6+
]
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright (c) OpenMMLab. All rights reserved.
2+
import copy
3+
4+
import torch
5+
from mmdet.models.detectors.base import ForwardResults
6+
from mmdet.structures import DetDataSample
7+
from mmdet.structures.det_data_sample import OptSampleList
8+
9+
from mmdeploy.core import FUNCTION_REWRITER, mark
10+
from mmdeploy.utils import is_dynamic_shape
11+
12+
13+
@mark('detr_predict', inputs=['input'], outputs=['dets', 'labels', 'masks'])
14+
def __predict_impl(self, batch_inputs, data_samples, rescale):
15+
"""Rewrite and adding mark for `predict`.
16+
17+
Encapsulate this function for rewriting `predict` of DetectionTransformer.
18+
1. Add mark for DetectionTransformer.
19+
2. Support both dynamic and static export to onnx.
20+
"""
21+
img_feats = self.extract_feat(batch_inputs)
22+
head_inputs_dict = self.forward_transformer(img_feats, data_samples)
23+
results_list = self.bbox_head.predict(
24+
**head_inputs_dict, rescale=rescale, batch_data_samples=data_samples)
25+
return results_list
26+
27+
28+
@torch.fx.wrap
29+
def _set_metainfo(data_samples, img_shape):
30+
"""Set the metainfo.
31+
32+
Code in this function cannot be traced by fx.
33+
"""
34+
35+
# fx can not trace deepcopy correctly
36+
data_samples = copy.deepcopy(data_samples)
37+
if data_samples is None:
38+
data_samples = [DetDataSample()]
39+
40+
# note that we can not use `set_metainfo`, deepcopy would crash the
41+
# onnx trace.
42+
for data_sample in data_samples:
43+
data_sample.set_field(
44+
name='img_shape', value=img_shape, field_type='metainfo')
45+
46+
return data_samples
47+
48+
49+
@FUNCTION_REWRITER.register_rewriter(
50+
'mmdet.models.detectors.base_detr.DetectionTransformer.predict')
51+
def detection_transformer__predict(self,
52+
batch_inputs: torch.Tensor,
53+
data_samples: OptSampleList = None,
54+
rescale: bool = True,
55+
**kwargs) -> ForwardResults:
56+
"""Rewrite `predict` for default backend.
57+
58+
Support configured dynamic/static shape for model input and return
59+
detection result as Tensor instead of numpy array.
60+
61+
Args:
62+
batch_inputs (Tensor): Inputs with shape (N, C, H, W).
63+
data_samples (List[:obj:`DetDataSample`]): The Data
64+
Samples. It usually includes information such as
65+
`gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
66+
rescale (Boolean): rescale result or not.
67+
68+
Returns:
69+
tuple[Tensor]: Detection results of the
70+
input images.
71+
- dets (Tensor): Classification bboxes and scores.
72+
Has a shape (num_instances, 5)
73+
- labels (Tensor): Labels of bboxes, has a shape
74+
(num_instances, ).
75+
"""
76+
ctx = FUNCTION_REWRITER.get_context()
77+
78+
deploy_cfg = ctx.cfg
79+
80+
# get origin input shape as tensor to support onnx dynamic shape
81+
is_dynamic_flag = is_dynamic_shape(deploy_cfg)
82+
img_shape = torch._shape_as_tensor(batch_inputs)[2:]
83+
if not is_dynamic_flag:
84+
img_shape = [int(val) for val in img_shape]
85+
86+
# set the metainfo
87+
data_samples = _set_metainfo(data_samples, img_shape)
88+
89+
return __predict_impl(self, batch_inputs, data_samples, rescale)

mmdeploy/pytorch/functions/interpolate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def interpolate__tensorrt(
8181
size: Optional[Union[int, Tuple[int], Tuple[int, int], Tuple[int, int,
8282
int]]] = None,
8383
scale_factor: Optional[Union[float, Tuple[float]]] = None,
84-
mode: str = 'bilinear',
84+
mode: str = 'nearest',
8585
align_corners: Optional[bool] = None,
8686
recompute_scale_factor: Optional[bool] = None,
8787
):

tests/regression/mmdet.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,9 @@ models:
250250
- *pipeline_ort_dynamic_fp32
251251
- *pipeline_trt_dynamic_fp32
252252
- *pipeline_ncnn_static_fp32
253-
- *pipeline_openvino_dynamic_fp32
253+
- deploy_config: configs/mmdet/detection/detection_openvino_dynamic-640x640.py
254+
convert_image: *convert_image
255+
backend_test: False
254256

255257
- name: Faster R-CNN
256258
metafile: configs/faster_rcnn/metafile.yml
@@ -298,7 +300,10 @@ models:
298300
- configs/detr/detr_r50_8xb2-150e_coco.py
299301
pipelines:
300302
- *pipeline_ort_dynamic_fp32
301-
- *pipeline_trt_dynamic_fp16
303+
- deploy_config: configs/mmdet/detection/detection_tensorrt-fp16_dynamic-64x64-800x800.py
304+
convert_image: *convert_image
305+
backend_test: *default_backend_test
306+
sdk_config: *sdk_dynamic
302307

303308
- name: CenterNet
304309
metafile: configs/centernet/metafile.yml
@@ -335,7 +340,7 @@ models:
335340
- configs/rtmdet/rtmdet_s_8xb32-300e_coco.py
336341
pipelines:
337342
- *pipeline_ort_dynamic_fp32
338-
- deploy_config: configs/mmdet/detection/detection_tensorrt_static-640x640.py
343+
- deploy_config: configs/mmdet/detection/detection_tensorrt_dynamic-64x64-800x800.py
339344
convert_image: *convert_image
340345
backend_test: *default_backend_test
341346
sdk_config: *sdk_dynamic

tests/test_codebase/test_mmcls/test_mmcls_models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ def get_invertedresidual_model():
2929
return model
3030

3131

32+
def get_fcuup_model():
33+
from mmcls.models.backbones.conformer import FCUUp
34+
model = FCUUp(16, 16, 16)
35+
36+
model.requires_grad_(False)
37+
return model
38+
39+
3240
def get_vit_backbone():
3341
from mmcls.models.classifiers.image import ImageClassifier
3442
model = ImageClassifier(
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
{
2+
"type": "DETR",
3+
"num_queries": 100,
4+
"data_preprocessor": {
5+
"type": "DetDataPreprocessor",
6+
"mean": [123.675, 116.28, 103.53],
7+
"std": [58.395, 57.12, 57.375],
8+
"bgr_to_rgb": true,
9+
"pad_size_divisor": 1
10+
},
11+
"backbone": {
12+
"type": "ResNet",
13+
"depth": 50,
14+
"num_stages": 4,
15+
"out_indices": [3],
16+
"frozen_stages": 1,
17+
"norm_cfg": {
18+
"type": "BN",
19+
"requires_grad": false
20+
},
21+
"norm_eval": true,
22+
"style": "pytorch",
23+
"init_cfg": {
24+
"type": "Pretrained",
25+
"checkpoint": "torchvision://resnet50"
26+
}
27+
},
28+
"neck": {
29+
"type": "ChannelMapper",
30+
"in_channels": [2048],
31+
"kernel_size": 1,
32+
"out_channels": 256,
33+
"num_outs": 1
34+
},
35+
"encoder": {
36+
"num_layers": 6,
37+
"layer_cfg": {
38+
"self_attn_cfg": {
39+
"embed_dims": 256,
40+
"num_heads": 8,
41+
"dropout": 0.1,
42+
"batch_first": true
43+
},
44+
"ffn_cfg": {
45+
"embed_dims": 256,
46+
"feedforward_channels": 2048,
47+
"num_fcs": 2,
48+
"ffn_drop": 0.1,
49+
"act_cfg": {
50+
"type": "ReLU",
51+
"inplace": true
52+
}
53+
}
54+
}
55+
},
56+
"decoder": {
57+
"num_layers": 6,
58+
"layer_cfg": {
59+
"self_attn_cfg": {
60+
"embed_dims": 256,
61+
"num_heads": 8,
62+
"dropout": 0.1,
63+
"batch_first": true
64+
},
65+
"cross_attn_cfg": {
66+
"embed_dims": 256,
67+
"num_heads": 8,
68+
"dropout": 0.1,
69+
"batch_first": true
70+
},
71+
"ffn_cfg": {
72+
"embed_dims": 256,
73+
"feedforward_channels": 2048,
74+
"num_fcs": 2,
75+
"ffn_drop": 0.1,
76+
"act_cfg": {
77+
"type": "ReLU",
78+
"inplace": true
79+
}
80+
}
81+
},
82+
"return_intermediate": true
83+
},
84+
"positional_encoding": {
85+
"num_feats": 128,
86+
"normalize": true
87+
},
88+
"bbox_head": {
89+
"type": "DETRHead",
90+
"num_classes": 80,
91+
"embed_dims": 256,
92+
"loss_cls": {
93+
"type": "CrossEntropyLoss",
94+
"bg_cls_weight": 0.1,
95+
"use_sigmoid": false,
96+
"loss_weight": 1.0,
97+
"class_weight": 1.0
98+
},
99+
"loss_bbox": {
100+
"type": "L1Loss",
101+
"loss_weight": 5.0
102+
},
103+
"loss_iou": {
104+
"type": "GIoULoss",
105+
"loss_weight": 2.0
106+
}
107+
},
108+
"train_cfg": {
109+
"assigner": {
110+
"type":
111+
"HungarianAssigner",
112+
"match_costs": [{
113+
"type": "ClassificationCost",
114+
"weight": 1.0
115+
}, {
116+
"type": "BBoxL1Cost",
117+
"weight": 5.0,
118+
"box_format": "xywh"
119+
}, {
120+
"type": "IoUCost",
121+
"iou_mode": "giou",
122+
"weight": 2.0
123+
}]
124+
}
125+
},
126+
"test_cfg": {
127+
"max_per_img": 100
128+
}
129+
}

0 commit comments

Comments
 (0)