Skip to content

Commit 9824bcc

Browse files
add rotated dino
1 parent 47f9928 commit 9824bcc

14 files changed

Lines changed: 2395 additions & 0 deletions
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
from torch.optim.adamw import AdamW
2+
from mmengine.config import read_base
3+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
4+
from mmengine.optim.scheduler import MultiStepLR
5+
from mmengine.optim.optimizer import OptimWrapper
6+
from mmdet.models.data_preprocessors import DetDataPreprocessor
7+
from mmdet.models.backbones import ResNet
8+
from mmdet.models.necks import ChannelMapper
9+
from mmdet.models.losses import FocalLoss, L1Loss
10+
from mmdet.models.task_modules import FocalLossCost, HungarianAssigner
11+
from ai4rs.models.losses import GDLoss
12+
from projects.rotated_dino.rotated_dino import RotatedDINO
13+
from projects.rotated_dino.rotated_dino.match_cost import RBoxL1Cost, GDCost
14+
from projects.rotated_dino.rotated_dino.rotated_dino_head import RotatedDINOHead
15+
16+
with read_base():
17+
from configs._base_.datasets.dior import *
18+
from configs._base_.default_runtime import *
19+
20+
21+
angle_cfg = dict(
22+
width_longer=True,
23+
start_angle=0,
24+
)
25+
angle_factor=3.1415926535897932384626433832795
26+
27+
model = dict(
28+
type=RotatedDINO,
29+
num_queries=900, # num_matching_queries
30+
with_box_refine=True,
31+
as_two_stage=True,
32+
data_preprocessor=dict(
33+
type=DetDataPreprocessor,
34+
mean=[123.675, 116.28, 103.53],
35+
std=[58.395, 57.12, 57.375],
36+
bgr_to_rgb=True,
37+
pad_size_divisor=1,
38+
boxtype2tensor=False),
39+
backbone=dict(
40+
type=ResNet,
41+
depth=50,
42+
num_stages=4,
43+
out_indices=(1, 2, 3),
44+
frozen_stages=1,
45+
norm_cfg=dict(type='BN', requires_grad=False),
46+
norm_eval=True,
47+
style='pytorch',
48+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
49+
neck=dict(
50+
type=ChannelMapper,
51+
in_channels=[512, 1024, 2048],
52+
kernel_size=1,
53+
out_channels=256,
54+
act_cfg=None,
55+
norm_cfg=dict(type='GN', num_groups=32),
56+
num_outs=4),
57+
encoder=dict(
58+
num_layers=6,
59+
layer_cfg=dict(
60+
self_attn_cfg=dict(embed_dims=256, num_levels=4,
61+
dropout=0.0), # 0.1 for DeformDETR
62+
ffn_cfg=dict(
63+
embed_dims=256,
64+
feedforward_channels=2048, # 1024 for DeformDETR
65+
ffn_drop=0.0))), # 0.1 for DeformDETR
66+
decoder=dict(
67+
num_layers=6,
68+
return_intermediate=True,
69+
angle_factor=angle_factor,
70+
layer_cfg=dict(
71+
self_attn_cfg=dict(embed_dims=256, num_heads=8,
72+
dropout=0.0), # 0.1 for DeformDETR
73+
cross_attn_cfg=dict(embed_dims=256, num_levels=4,
74+
dropout=0.0), # 0.1 for DeformDETR
75+
ffn_cfg=dict(
76+
embed_dims=256,
77+
feedforward_channels=2048, # 1024 for DeformDETR
78+
ffn_drop=0.0)), # 0.1 for DeformDETR
79+
post_norm_cfg=None),
80+
positional_encoding=dict(
81+
num_feats=128,
82+
normalize=True,
83+
offset=0.0, # -0.5 for DeformDETR
84+
temperature=20), # 10000 for DeformDETR
85+
bbox_head=dict(
86+
type=RotatedDINOHead,
87+
num_classes=20,
88+
angle_cfg=angle_cfg,
89+
angle_factor=angle_factor,
90+
sync_cls_avg_factor=True,
91+
loss_cls=dict(
92+
type=FocalLoss,
93+
use_sigmoid=True,
94+
gamma=2.0,
95+
alpha=0.25,
96+
loss_weight=1.0), # 2.0 in DeformDETR
97+
loss_bbox=dict(type=L1Loss, loss_weight=5.0),
98+
loss_iou=dict(
99+
type=GDLoss,
100+
loss_type='kld',
101+
fun='log1p',
102+
tau=1,
103+
sqrt=False,
104+
loss_weight=2.0)),
105+
dn_cfg=dict( # TODO: Move to model.train_cfg ?
106+
label_noise_scale=0.5,
107+
box_noise_scale=1.0, # 0.4 for DN-DETR
108+
angle_cfg=angle_cfg,
109+
angle_factor=angle_factor,
110+
noise_mode='only_xyxy', # 'only_xyxy', 'only_angle', 'only_xywh', 'all_xyxya'
111+
group_cfg=dict(dynamic=True, num_groups=None,
112+
num_dn_queries=100)), # TODO: half num_dn_queries
113+
# training and testing settings
114+
train_cfg=dict(
115+
assigner=dict(
116+
type=HungarianAssigner,
117+
match_costs=[
118+
dict(type=FocalLossCost, weight=2.0),
119+
dict(
120+
type=RBoxL1Cost,
121+
weight=5.0,
122+
box_format='xywha',
123+
angle_factor=angle_factor),
124+
dict(
125+
type=GDCost,
126+
loss_type='kld',
127+
fun='log1p',
128+
tau=1,
129+
sqrt=False,
130+
weight=2.0)
131+
])),
132+
test_cfg=dict(max_per_img=500)) # 100 for DeformDETR
133+
134+
# optimizer
135+
optim_wrapper = dict(
136+
type=OptimWrapper,
137+
optimizer=dict(
138+
type=AdamW,
139+
lr=0.0001, # 0.0002 for DeformDETR
140+
weight_decay=0.0001),
141+
clip_grad=dict(max_norm=0.1, norm_type=2),
142+
paramwise_cfg=dict(custom_keys={'backbone': dict(lr_mult=0.1)})
143+
) # custom_keys contains sampling_offsets and reference_points in DeformDETR # noqa
144+
145+
# learning policy
146+
max_epochs = 12
147+
train_cfg = dict(
148+
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=2)
149+
val_cfg = dict(type=ValLoop)
150+
test_cfg = dict(type=TestLoop)
151+
152+
param_scheduler = [
153+
dict(
154+
type=MultiStepLR,
155+
begin=0,
156+
end=max_epochs,
157+
by_epoch=True,
158+
milestones=[11],
159+
gamma=0.1)
160+
]
161+
162+
# dataset settings
163+
train_dataloader.update(batch_size=4, num_workers=4)
164+
val_dataloader.update(batch_size=4, num_workers=4)
165+
test_dataloader.update(batch_size=4, num_workers=4)
166+
167+
# NOTE: `auto_scale_lr` is for automatically scaling LR,
168+
# USER SHOULD NOT CHANGE ITS VALUES.
169+
# base_batch_size = (2 GPUs) x (4 samples per GPU)
170+
auto_scale_lr = dict(base_batch_size=8, enable=False)
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
from torch.optim.adamw import AdamW
2+
from mmengine.config import read_base
3+
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
4+
from mmengine.optim.scheduler import MultiStepLR
5+
from mmengine.optim.optimizer import OptimWrapper
6+
from mmdet.models.data_preprocessors import DetDataPreprocessor
7+
from mmdet.models.backbones import ResNet
8+
from mmdet.models.necks import ChannelMapper
9+
from mmdet.models.losses import FocalLoss, L1Loss
10+
from mmdet.models.task_modules import FocalLossCost, HungarianAssigner
11+
from ai4rs.models.losses import GDLoss
12+
from projects.rotated_dino.rotated_dino import RotatedDINO
13+
from projects.rotated_dino.rotated_dino.match_cost import RBoxL1Cost, GDCost
14+
from projects.rotated_dino.rotated_dino.rotated_dino_head import RotatedDINOHead
15+
16+
with read_base():
17+
from configs._base_.datasets.dota import *
18+
from configs._base_.default_runtime import *
19+
20+
21+
angle_cfg = dict(
22+
width_longer=True,
23+
start_angle=0,
24+
)
25+
angle_factor=3.1415926535897932384626433832795
26+
27+
model = dict(
28+
type=RotatedDINO,
29+
num_queries=900, # num_matching_queries
30+
with_box_refine=True,
31+
as_two_stage=True,
32+
data_preprocessor=dict(
33+
type=DetDataPreprocessor,
34+
mean=[123.675, 116.28, 103.53],
35+
std=[58.395, 57.12, 57.375],
36+
bgr_to_rgb=True,
37+
pad_size_divisor=1,
38+
boxtype2tensor=False),
39+
backbone=dict(
40+
type=ResNet,
41+
depth=50,
42+
num_stages=4,
43+
out_indices=(1, 2, 3),
44+
frozen_stages=1,
45+
norm_cfg=dict(type='BN', requires_grad=False),
46+
norm_eval=True,
47+
style='pytorch',
48+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
49+
neck=dict(
50+
type=ChannelMapper,
51+
in_channels=[512, 1024, 2048],
52+
kernel_size=1,
53+
out_channels=256,
54+
act_cfg=None,
55+
norm_cfg=dict(type='GN', num_groups=32),
56+
num_outs=4),
57+
encoder=dict(
58+
num_layers=6,
59+
layer_cfg=dict(
60+
self_attn_cfg=dict(embed_dims=256, num_levels=4,
61+
dropout=0.0), # 0.1 for DeformDETR
62+
ffn_cfg=dict(
63+
embed_dims=256,
64+
feedforward_channels=2048, # 1024 for DeformDETR
65+
ffn_drop=0.0))), # 0.1 for DeformDETR
66+
decoder=dict(
67+
num_layers=6,
68+
return_intermediate=True,
69+
angle_factor=angle_factor,
70+
layer_cfg=dict(
71+
self_attn_cfg=dict(embed_dims=256, num_heads=8,
72+
dropout=0.0), # 0.1 for DeformDETR
73+
cross_attn_cfg=dict(embed_dims=256, num_levels=4,
74+
dropout=0.0), # 0.1 for DeformDETR
75+
ffn_cfg=dict(
76+
embed_dims=256,
77+
feedforward_channels=2048, # 1024 for DeformDETR
78+
ffn_drop=0.0)), # 0.1 for DeformDETR
79+
post_norm_cfg=None),
80+
positional_encoding=dict(
81+
num_feats=128,
82+
normalize=True,
83+
offset=0.0, # -0.5 for DeformDETR
84+
temperature=20), # 10000 for DeformDETR
85+
bbox_head=dict(
86+
type=RotatedDINOHead,
87+
num_classes=15,
88+
angle_cfg=angle_cfg,
89+
angle_factor=angle_factor,
90+
sync_cls_avg_factor=True,
91+
loss_cls=dict(
92+
type=FocalLoss,
93+
use_sigmoid=True,
94+
gamma=2.0,
95+
alpha=0.25,
96+
loss_weight=1.0), # 2.0 in DeformDETR
97+
loss_bbox=dict(type=L1Loss, loss_weight=5.0),
98+
loss_iou=dict(
99+
type=GDLoss,
100+
loss_type='kld',
101+
fun='log1p',
102+
tau=1,
103+
sqrt=False,
104+
loss_weight=2.0)),
105+
dn_cfg=dict( # TODO: Move to model.train_cfg ?
106+
label_noise_scale=0.5,
107+
box_noise_scale=1.0, # 0.4 for DN-DETR
108+
angle_cfg=angle_cfg,
109+
angle_factor=angle_factor,
110+
noise_mode='only_xyxy', # 'only_xyxy', 'only_angle', 'only_xywh', 'all_xyxya'
111+
group_cfg=dict(dynamic=True, num_groups=None,
112+
num_dn_queries=100)), # TODO: half num_dn_queries
113+
# training and testing settings
114+
train_cfg=dict(
115+
assigner=dict(
116+
type=HungarianAssigner,
117+
match_costs=[
118+
dict(type=FocalLossCost, weight=2.0),
119+
dict(
120+
type=RBoxL1Cost,
121+
weight=5.0,
122+
box_format='xywha',
123+
angle_factor=angle_factor),
124+
dict(
125+
type=GDCost,
126+
loss_type='kld',
127+
fun='log1p',
128+
tau=1,
129+
sqrt=False,
130+
weight=2.0)
131+
])),
132+
test_cfg=dict(max_per_img=500)) # 100 for DeformDETR
133+
134+
# optimizer
135+
optim_wrapper = dict(
136+
type=OptimWrapper,
137+
optimizer=dict(
138+
type=AdamW,
139+
lr=0.0001, # 0.0002 for DeformDETR
140+
weight_decay=0.0001),
141+
clip_grad=dict(max_norm=0.1, norm_type=2),
142+
paramwise_cfg=dict(custom_keys={'backbone': dict(lr_mult=0.1)})
143+
) # custom_keys contains sampling_offsets and reference_points in DeformDETR # noqa
144+
145+
# learning policy
146+
max_epochs = 12
147+
train_cfg = dict(
148+
type=EpochBasedTrainLoop, max_epochs=max_epochs, val_interval=2)
149+
val_cfg = dict(type=ValLoop)
150+
test_cfg = dict(type=TestLoop)
151+
152+
param_scheduler = [
153+
dict(
154+
type=MultiStepLR,
155+
begin=0,
156+
end=max_epochs,
157+
by_epoch=True,
158+
milestones=[11],
159+
gamma=0.1)
160+
]
161+
162+
# dataset settings
163+
train_dataloader.update(batch_size=4, num_workers=4)
164+
val_dataloader.update(batch_size=4, num_workers=4)
165+
test_dataloader.update(batch_size=4, num_workers=4)
166+
167+
# NOTE: `auto_scale_lr` is for automatically scaling LR,
168+
# USER SHOULD NOT CHANGE ITS VALUES.
169+
# base_batch_size = (2 GPUs) x (4 samples per GPU)
170+
auto_scale_lr = dict(base_batch_size=8, enable=False)

0 commit comments

Comments
 (0)