config_rtdetrwithrepvgg.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. auto_scale_lr = dict(base_batch_size=32)
  2. backend_args = None
  3. image_size=(480,480)
  4. pretrained = 'https://github.com/flytocc/mmdetection/releases/download/model_zoo/resnet34vd_pretrained_f6a72dc5.pth'
  5. data_root = '../../../media/tricolops/T7/Dataset/coco_format_bd/'
  6. dataset_type = 'CocoDataset'
  7. interpolations = ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos']
  8. default_hooks = dict(
  9. checkpoint=dict(interval=5, save_best='auto', type='CheckpointHook'),
  10. logger=dict(interval=50, type='LoggerHook'),
  11. param_scheduler=dict(type='ParamSchedulerHook'),
  12. sampler_seed=dict(type='DistSamplerSeedHook'),
  13. timer=dict(type='IterTimerHook'),
  14. visualization=dict(type='DetVisualizationHook'))
  15. custom_hooks = [
  16. dict(
  17. type='EMAHook',
  18. ema_type='ExpMomentumEMA',
  19. momentum=0.0001,
  20. update_buffers=True,
  21. priority=49)
  22. ]
  23. default_scope = 'mmdet'
  24. env_cfg = dict(
  25. cudnn_benchmark=False,
  26. dist_cfg=dict(backend='nccl'),
  27. mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
  28. launcher = 'none'
  29. load_from = None
  30. log_level = 'INFO'
  31. log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
  32. max_epochs = 300
  33. metainfo = dict(
  34. classes=('barcode', ), palette=[
  35. (
  36. 220,
  37. 20,
  38. 60,
  39. ),
  40. ])
  41. model = dict(
  42. data_preprocessor=dict(
  43. type='DetDataPreprocessor',
  44. batch_augments=[
  45. dict(
  46. type='BatchSyncRandomResize',
  47. random_size_range=(384, 576),
  48. size_divisor=32,
  49. interval=1)
  50. ]),
  51. backbone=dict(
  52. arch="A0",
  53. in_channels=1,
  54. base_channels=64,
  55. out_indices=(1, 2, 3),
  56. strides=(1, 2, 2, 2),
  57. dilations=(1, 1, 1, 1),
  58. frozen_stages=-1,
  59. norm_cfg=dict(requires_grad=True, type='BN'),
  60. #grad will update when not using pretrained
  61. norm_eval=False,
  62. type='RepVGG'),
  63. bbox_head=dict(
  64. num_classes=1,
  65. embed_dims= 256,
  66. #same as hidding dims in decoder and hybridencoder
  67. num_reg_fcs= 2,
  68. sync_cls_avg_factor = False,
  69. loss_cls = dict(
  70. type='VarifocalLoss',
  71. use_sigmoid = True,
  72. alpha= 0.75,
  73. gamma= 2.0,
  74. with_logits=True,
  75. iou_weighted= True,
  76. reduction= 'mean',
  77. loss_weight= 1),
  78. loss_bbox= dict(type='L1Loss', loss_weight=5.0),
  79. loss_iou= dict(type='GIoULoss', loss_weight=2.0),
  80. num_pred_layer = 6,
  81. type='RTDETRHead'),
  82. decoder=dict(
  83. num_classes=1,
  84. hidden_dim=256,
  85. num_queries=300,
  86. position_type='sine',
  87. feat_channels=[256, 256, 256],
  88. #after neck it should be 256
  89. feat_strides=[8,16,32],
  90. #eval size should be same as encoder feat stride
  91. num_levels=3,
  92. num_crossattn_points=8,
  93. #change cross attn points
  94. number_head=8,
  95. number_decoder_layer=3,
  96. #4 for 34 and 6 for 50
  97. dim_feedforward_ratio=4,
  98. dropout=0.0,
  99. act_cfg= dict(type='ReLU', inplace=True),
  100. num_denoising=100,
  101. label_noise_ratio=0.5,
  102. box_noise_scale=1.0,
  103. learnt_init_query=True,
  104. eval_idx=-1,
  105. eval_spatial_size=[480, 480],
  106. eps=1e-2
  107. ),
  108. neck=dict(
  109. in_channels=[128,256,512],
  110. #same as feature from back bone using last four layer
  111. #in_channels=[128,256,512] for resnet34 and [512,1024,2048] for resnet50
  112. feat_strides=[8,16,32],
  113. hidden_dim=256,
  114. n_head=8,
  115. dim_feedforward_ratio=4,
  116. drop_out=0.0,
  117. enc_act=dict(type='GELU'),
  118. use_encoder_idx=[2],
  119. num_encoder_layers=1,
  120. pe_temperature=100*100,
  121. with_spd=False,
  122. norm_cfg= dict(type='BN', requires_grad=True),
  123. widen_factor=0.5,
  124. deepen_factor=1,
  125. input_proj_cfg=dict(
  126. type='ChannelMapper',
  127. in_channels=[128, 256, 512],
  128. #same as input channel in encoder
  129. kernel_size=1,
  130. out_channels=256,
  131. act_cfg=None,
  132. norm_cfg=dict(type='BN', requires_grad=True),
  133. num_outs=3),
  134. eval_spatial_size=[480, 480],
  135. act_cfg= dict(type='SiLU', inplace=True),
  136. type='HybridEncoder'),
  137. num_queries=300,
  138. test_cfg=dict(max_per_img=50),
  139. train_cfg=dict(
  140. assigner=dict(
  141. match_costs=[
  142. dict(type='FocalLossCost', weight=2.0),
  143. dict(box_format='xywh', type='BBoxL1Cost', weight=5.0),
  144. dict(iou_mode='giou', type='IoUCost', weight=2.0),
  145. ],
  146. type='HungarianAssigner')),
  147. type='RtDetr',)
  148. optim_wrapper = dict(
  149. clip_grad=dict(max_norm=0.1, norm_type=2),
  150. optimizer=dict(lr=0.0004, type='AdamW', weight_decay=0.0001),
  151. paramwise_cfg=dict(
  152. custom_keys={'backbone': dict(lr_mult=0.1)},
  153. norm_decay_mult=0,
  154. bypass_duplicate=True),
  155. type='OptimWrapper')
  156. param_scheduler = [
  157. dict(
  158. type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=2000)
  159. ]
  160. resume = False
  161. test_cfg = dict(type='TestLoop')
  162. test_pipeline = [
  163. dict(backend_args=None,color_type='grayscale', type='LoadImageFromFile'),
  164. dict(type='LoadAnnotations', with_bbox=True),
  165. dict(
  166. type='Resize',
  167. scale=image_size,
  168. keep_ratio=False,
  169. # interpolation='bicubic'
  170. ),
  171. dict(
  172. type='PackDetInputs',
  173. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape','scale_factor')),
  174. ]
  175. test_dataloader = dict(
  176. batch_size=1,
  177. dataset=dict(
  178. ann_file='Val_resize/Val_resize.json',
  179. backend_args=None,
  180. data_prefix=dict(img='Val_resize/'),
  181. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  182. metainfo=dict(classes=('barcode', ), palette=[
  183. (
  184. 220,
  185. 20,
  186. 60,
  187. ),
  188. ]),
  189. pipeline=test_pipeline,
  190. test_mode=True,
  191. type='CocoDataset'),
  192. drop_last=False,
  193. num_workers=2,
  194. persistent_workers=True,
  195. sampler=dict(shuffle=False, type='DefaultSampler'))
  196. test_evaluator = dict(
  197. ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val_resize/Val_resize.json',
  198. backend_args=None,
  199. format_only=False,
  200. metric='bbox',
  201. type='CocoMetric')
  202. train_cfg = dict(max_epochs=max_epochs, type='EpochBasedTrainLoop', val_interval=1)
  203. train_pipeline = [
  204. dict(type='LoadImageFromFile', color_type='grayscale',backend_args = None),
  205. dict(type='LoadAnnotations', with_bbox=True),
  206. # dict(
  207. # type='RandomApply',
  208. # transforms=dict(type='PhotoMetricDistortion'),
  209. # prob=0.8),
  210. dict(
  211. type='RandomApply', transforms=dict(type='MinIoURandomCrop'),
  212. prob=0.8),
  213. dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
  214. dict(type='RandomFlip', prob=0.5),
  215. dict(
  216. type='RandomChoice',
  217. transforms=[[
  218. dict(
  219. type='Resize',
  220. scale=image_size,
  221. keep_ratio=False,
  222. interpolation=interpolation)
  223. ] for interpolation in interpolations]),
  224. dict(type='PackDetInputs')
  225. ]
  226. train_dataloader = dict(
  227. batch_sampler=dict(type='AspectRatioBatchSampler'),
  228. batch_size=16,
  229. dataset=dict(
  230. ann_file='Train_resize/Train_resize.json',
  231. backend_args=None,
  232. data_prefix=dict(img='Train_resize/'),
  233. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  234. filter_cfg=dict(filter_empty_gt=False, min_size=32),
  235. metainfo=dict(classes=('barcode', ), palette=[
  236. (
  237. 220,
  238. 20,
  239. 60,
  240. ),
  241. ]),
  242. pipeline=train_pipeline,
  243. type='CocoDataset'),
  244. num_workers=2,
  245. persistent_workers=True,
  246. sampler=dict(shuffle=True, type='DefaultSampler'))
  247. val_cfg = dict(type='ValLoop')
  248. val_dataloader = dict(
  249. batch_size=1,
  250. dataset=dict(
  251. ann_file='Val_resize/Val_resize.json',
  252. backend_args=None,
  253. data_prefix=dict(img='Val_resize/'),
  254. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  255. metainfo=dict(classes=('barcode', ), palette=[
  256. (
  257. 220,
  258. 20,
  259. 60,
  260. ),
  261. ]),
  262. pipeline=[
  263. dict(backend_args=None, color_type="grayscale",type='LoadImageFromFile'),
  264. dict(keep_ratio=False, scale=image_size, type='Resize'),
  265. dict(type='LoadAnnotations', with_bbox=True),
  266. dict(
  267. meta_keys=(
  268. 'img_id',
  269. 'img_path',
  270. 'ori_shape',
  271. 'img_shape',
  272. 'scale_factor',
  273. ),
  274. type='PackDetInputs'),
  275. ],
  276. test_mode=True,
  277. type='CocoDataset'),
  278. drop_last=False,
  279. num_workers=2,
  280. persistent_workers=True,
  281. sampler=dict(shuffle=False, type='DefaultSampler'))
  282. val_evaluator = dict(
  283. ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val_resize/Val_resize.json',
  284. backend_args=None,
  285. format_only=False,
  286. metric='bbox',
  287. type='CocoMetric')
  288. vis_backends = [
  289. dict(type='LocalVisBackend'),
  290. ]
  291. visualizer = dict(
  292. name='visualizer',
  293. type='DetLocalVisualizer',
  294. vis_backends=[
  295. dict(type='LocalVisBackend'),
  296. ])
  297. work_dir = './work_dirs/rtdetrrepvgg'