Description
I get this error message: '"binary marks" must have the same shape as the image', when I run inference with the trained model. It's an RTMDet model that I trained with mmdetection 3.x. What could be the possible causes for the model to predict a mask with a different dimension than the image?
This is my training config:
base = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/mmdetection/configs/rtmdet/rtmdet-ins_m_8xb32-300e_coco.py'
load_from = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet-ins_m_8xb32-300e_coco_20221123_001039-6eba602e.pth'
#model.backbone.frozen_stages=4
#data_root = ''
work_dir = '/home/erik/Riksarkivet/Projects/HTR_Pipeline/models/checkpoints/rtmdet_regions_6'
base_lr = 0.004/16
train_batch_size_per_gpu = 2
val_batch_size_per_gpu = 1
train_num_workers = 1
num_classes = 1
metainfo = {
'classes': ('TextRegion'),
'palette': [
(220, 20, 60)
]
}
model = dict(bbox_head=dict(num_classes=1))
icdar_2019 = dict(
type='CocoDataset',
metainfo=metainfo,
data_prefix=dict(img='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'),
ann_file='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json',
pipeline=base.train_pipeline
)
icdar_2019_test = dict(
type='CocoDataset',
metainfo=metainfo,
data_prefix=dict(img='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/'),
ann_file='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/ICDAR-2019/clean/gt_files/coco_regions2.json',
test_mode=True,
pipeline=base.test_pipeline
)
police_records = dict(
type='CocoDataset',
metainfo=metainfo,
data_prefix=dict(img='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'),
ann_file='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
pipeline=base.train_pipeline
)
train_list = [police_records, icdar_2019]
test_list = [icdar_2019_test]
train_dataloader = dict(
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=train_list,
))
val_dataloader = dict(
batch_size=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='CocoDataset',
metainfo=metainfo,
data_prefix=dict(img='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/'),
ann_file='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json',
pipeline=base.test_pipeline,
test_mode=True
))
test_dataloader = val_dataloader
val_evaluator = dict(
type='CocoMetric',
metric=['bbox', 'segm'],
ann_file='/media/erik/Elements/Riksarkivet/data/datasets/htr/segmentation/police_records/gt_files/coco_regions2.json'
)
test_evaluator = val_evaluator
model = dict(test_cfg=dict(
nms_pre=200,
min_bbox_size=0,
score_thr=0.4,
nms=dict(type='nms', iou_threshold=0.6),
max_per_img=50,
mask_thr_binary=0.5))
# loss_cls is dynamically adjusted based on num_classes, but when num_classes = 1, loss_cls is always 0
default_hooks = dict(
# set how many epochs to save the model, and the maximum number of models to save,save_best
is also the best model (recommended).
checkpoint=dict(
type='CheckpointHook',
interval=1,
max_keep_ckpts=5,
save_best='auto'),
# logger output interval
logger=dict(type='LoggerHook', interval=100))
max_epochs = 12
stage2_num_epochs = 2
#base_lr = 0.004/16
interval = 12
train_cfg = dict(
max_epochs=12,
val_interval=12,
dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)])
test_cfg = dict(pipeline=base.test_pipeline)
pipeline=base.test_pipeline
optim_wrapper = dict(
delete=True,
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer', save_dir='/home/erik/Riksarkivet/Projects/HTR_Pipeline/output')
greatful for help!