1+ from torch .optim .adamw import AdamW
2+ from mmengine .config import read_base
3+ from mmengine .runner .loops import EpochBasedTrainLoop , TestLoop , ValLoop
4+ from mmengine .optim .scheduler import MultiStepLR
5+ from mmengine .optim .optimizer import OptimWrapper
6+ from mmdet .models .data_preprocessors import DetDataPreprocessor
7+ from mmdet .models .backbones import ResNet
8+ from mmdet .models .necks import ChannelMapper
9+ from mmdet .models .losses import FocalLoss , L1Loss
10+ from mmdet .models .task_modules import FocalLossCost , HungarianAssigner
11+ from ai4rs .models .losses import GDLoss
12+ from projects .rotated_dino .rotated_dino import RotatedDINO
13+ from projects .rotated_dino .rotated_dino .match_cost import RBoxL1Cost , GDCost
14+ from projects .rotated_dino .rotated_dino .rotated_dino_head import RotatedDINOHead
15+
16+ with read_base ():
17+ from configs ._base_ .datasets .dior import *
18+ from configs ._base_ .default_runtime import *
19+
20+
21+ angle_cfg = dict (
22+ width_longer = True ,
23+ start_angle = 0 ,
24+ )
25+ angle_factor = 3.1415926535897932384626433832795
26+
27+ model = dict (
28+ type = RotatedDINO ,
29+ num_queries = 900 , # num_matching_queries
30+ with_box_refine = True ,
31+ as_two_stage = True ,
32+ data_preprocessor = dict (
33+ type = DetDataPreprocessor ,
34+ mean = [123.675 , 116.28 , 103.53 ],
35+ std = [58.395 , 57.12 , 57.375 ],
36+ bgr_to_rgb = True ,
37+ pad_size_divisor = 1 ,
38+ boxtype2tensor = False ),
39+ backbone = dict (
40+ type = ResNet ,
41+ depth = 50 ,
42+ num_stages = 4 ,
43+ out_indices = (1 , 2 , 3 ),
44+ frozen_stages = 1 ,
45+ norm_cfg = dict (type = 'BN' , requires_grad = False ),
46+ norm_eval = True ,
47+ style = 'pytorch' ,
48+ init_cfg = dict (type = 'Pretrained' , checkpoint = 'torchvision://resnet50' )),
49+ neck = dict (
50+ type = ChannelMapper ,
51+ in_channels = [512 , 1024 , 2048 ],
52+ kernel_size = 1 ,
53+ out_channels = 256 ,
54+ act_cfg = None ,
55+ norm_cfg = dict (type = 'GN' , num_groups = 32 ),
56+ num_outs = 4 ),
57+ encoder = dict (
58+ num_layers = 6 ,
59+ layer_cfg = dict (
60+ self_attn_cfg = dict (embed_dims = 256 , num_levels = 4 ,
61+ dropout = 0.0 ), # 0.1 for DeformDETR
62+ ffn_cfg = dict (
63+ embed_dims = 256 ,
64+ feedforward_channels = 2048 , # 1024 for DeformDETR
65+ ffn_drop = 0.0 ))), # 0.1 for DeformDETR
66+ decoder = dict (
67+ num_layers = 6 ,
68+ return_intermediate = True ,
69+ angle_factor = angle_factor ,
70+ layer_cfg = dict (
71+ self_attn_cfg = dict (embed_dims = 256 , num_heads = 8 ,
72+ dropout = 0.0 ), # 0.1 for DeformDETR
73+ cross_attn_cfg = dict (embed_dims = 256 , num_levels = 4 ,
74+ dropout = 0.0 ), # 0.1 for DeformDETR
75+ ffn_cfg = dict (
76+ embed_dims = 256 ,
77+ feedforward_channels = 2048 , # 1024 for DeformDETR
78+ ffn_drop = 0.0 )), # 0.1 for DeformDETR
79+ post_norm_cfg = None ),
80+ positional_encoding = dict (
81+ num_feats = 128 ,
82+ normalize = True ,
83+ offset = 0.0 , # -0.5 for DeformDETR
84+ temperature = 20 ), # 10000 for DeformDETR
85+ bbox_head = dict (
86+ type = RotatedDINOHead ,
87+ num_classes = 20 ,
88+ angle_cfg = angle_cfg ,
89+ angle_factor = angle_factor ,
90+ sync_cls_avg_factor = True ,
91+ loss_cls = dict (
92+ type = FocalLoss ,
93+ use_sigmoid = True ,
94+ gamma = 2.0 ,
95+ alpha = 0.25 ,
96+ loss_weight = 1.0 ), # 2.0 in DeformDETR
97+ loss_bbox = dict (type = L1Loss , loss_weight = 5.0 ),
98+ loss_iou = dict (
99+ type = GDLoss ,
100+ loss_type = 'kld' ,
101+ fun = 'log1p' ,
102+ tau = 1 ,
103+ sqrt = False ,
104+ loss_weight = 2.0 )),
105+ dn_cfg = dict ( # TODO: Move to model.train_cfg ?
106+ label_noise_scale = 0.5 ,
107+ box_noise_scale = 1.0 , # 0.4 for DN-DETR
108+ angle_cfg = angle_cfg ,
109+ angle_factor = angle_factor ,
110+ noise_mode = 'only_xyxy' , # 'only_xyxy', 'only_angle', 'only_xywh', 'all_xyxya'
111+ group_cfg = dict (dynamic = True , num_groups = None ,
112+ num_dn_queries = 100 )), # TODO: half num_dn_queries
113+ # training and testing settings
114+ train_cfg = dict (
115+ assigner = dict (
116+ type = HungarianAssigner ,
117+ match_costs = [
118+ dict (type = FocalLossCost , weight = 2.0 ),
119+ dict (
120+ type = RBoxL1Cost ,
121+ weight = 5.0 ,
122+ box_format = 'xywha' ,
123+ angle_factor = angle_factor ),
124+ dict (
125+ type = GDCost ,
126+ loss_type = 'kld' ,
127+ fun = 'log1p' ,
128+ tau = 1 ,
129+ sqrt = False ,
130+ weight = 2.0 )
131+ ])),
132+ test_cfg = dict (max_per_img = 500 )) # 100 for DeformDETR
133+
134+ # optimizer
135+ optim_wrapper = dict (
136+ type = OptimWrapper ,
137+ optimizer = dict (
138+ type = AdamW ,
139+ lr = 0.0001 , # 0.0002 for DeformDETR
140+ weight_decay = 0.0001 ),
141+ clip_grad = dict (max_norm = 0.1 , norm_type = 2 ),
142+ paramwise_cfg = dict (custom_keys = {'backbone' : dict (lr_mult = 0.1 )})
143+ ) # custom_keys contains sampling_offsets and reference_points in DeformDETR # noqa
144+
145+ # learning policy
146+ max_epochs = 12
147+ train_cfg = dict (
148+ type = EpochBasedTrainLoop , max_epochs = max_epochs , val_interval = 2 )
149+ val_cfg = dict (type = ValLoop )
150+ test_cfg = dict (type = TestLoop )
151+
152+ param_scheduler = [
153+ dict (
154+ type = MultiStepLR ,
155+ begin = 0 ,
156+ end = max_epochs ,
157+ by_epoch = True ,
158+ milestones = [11 ],
159+ gamma = 0.1 )
160+ ]
161+
162+ # dataset settings
163+ train_dataloader .update (batch_size = 4 , num_workers = 4 )
164+ val_dataloader .update (batch_size = 4 , num_workers = 4 )
165+ test_dataloader .update (batch_size = 4 , num_workers = 4 )
166+
167+ # NOTE: `auto_scale_lr` is for automatically scaling LR,
168+ # USER SHOULD NOT CHANGE ITS VALUES.
169+ # base_batch_size = (2 GPUs) x (4 samples per GPU)
170+ auto_scale_lr = dict (base_batch_size = 8 , enable = False )
0 commit comments