config.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. auto_scale_lr = dict(base_batch_size=32)
  2. backend_args = None
  3. image_size=(480,480)
  4. pretrained = 'https://github.com/flytocc/mmdetection/releases/download/model_zoo/resnet34vd_pretrained_f6a72dc5.pth'
  5. data_root = '../../../media/tricolops/T7/Dataset/coco_format_bd/'
  6. dataset_type = 'CocoDataset'
  7. interpolations = ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos']
  8. default_hooks = dict(
  9. checkpoint=dict(interval=5, save_best='auto', type='CheckpointHook'),
  10. logger=dict(interval=50, type='LoggerHook'),
  11. param_scheduler=dict(type='ParamSchedulerHook'),
  12. sampler_seed=dict(type='DistSamplerSeedHook'),
  13. timer=dict(type='IterTimerHook'),
  14. visualization=dict(type='DetVisualizationHook'))
  15. custom_hooks = [
  16. dict(
  17. type='EMAHook',
  18. ema_type='ExpMomentumEMA',
  19. momentum=0.0001,
  20. update_buffers=True,
  21. priority=49)
  22. ]
  23. default_scope = 'mmdet'
  24. env_cfg = dict(
  25. cudnn_benchmark=False,
  26. dist_cfg=dict(backend='nccl'),
  27. mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
  28. launcher = 'none'
  29. load_from = None
  30. log_level = 'INFO'
  31. log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
  32. max_epochs = 300
  33. metainfo = dict(
  34. classes=('barcode', ), palette=[
  35. (
  36. 220,
  37. 20,
  38. 60,
  39. ),
  40. ])
  41. model = dict(
  42. data_preprocessor=dict(
  43. type='DetDataPreprocessor',
  44. batch_augments=[
  45. dict(
  46. type='BatchSyncRandomResize',
  47. random_size_range=(384, 576),
  48. size_divisor=32,
  49. interval=1)
  50. ]),
  51. backbone=dict(
  52. depth=18,
  53. frozen_stages=-1,
  54. #0 for 50,-1 for 34
  55. # init_cfg=dict(checkpoint='torchvision://resnet34', type='Pretrained'),
  56. norm_cfg=dict(requires_grad=True, type='BN'),
  57. #grad will update when not using pretrained
  58. norm_eval=False,
  59. num_stages=4,
  60. out_indices=(
  61. 1,
  62. 2,
  63. 3,
  64. ),
  65. style='pytorch',
  66. type='ResNet'),
  67. bbox_head=dict(
  68. num_classes=1,
  69. embed_dims= 256,
  70. #same as hidding dims in decoder and hybridencoder
  71. num_reg_fcs= 2,
  72. sync_cls_avg_factor = False,
  73. loss_cls = dict(
  74. type='VarifocalLoss',
  75. use_sigmoid = True,
  76. alpha= 0.75,
  77. gamma= 2.0,
  78. with_logits=True,
  79. iou_weighted= True,
  80. reduction= 'mean',
  81. loss_weight= 1),
  82. loss_bbox= dict(type='L1Loss', loss_weight=5.0),
  83. loss_iou= dict(type='GIoULoss', loss_weight=2.0),
  84. num_pred_layer = 6,
  85. type='RTDETRHead'),
  86. decoder=dict(
  87. num_classes=1,
  88. hidden_dim=256,
  89. num_queries=300,
  90. position_type='sine',
  91. feat_channels=[256, 256, 256],
  92. #after neck it should be 256
  93. feat_strides=[8,16,32],
  94. #eval size should be same as encoder feat stride
  95. num_levels=3,
  96. num_crossattn_points=8,
  97. #change cross attn points
  98. number_head=8,
  99. number_decoder_layer=3,
  100. #4 for 34 and 6 for 50
  101. dim_feedforward_ratio=4,
  102. dropout=0.0,
  103. act_cfg= dict(type='ReLU', inplace=True),
  104. num_denoising=100,
  105. label_noise_ratio=0.5,
  106. box_noise_scale=1.0,
  107. learnt_init_query=True,
  108. eval_idx=-1,
  109. eval_spatial_size=[480, 480],
  110. eps=1e-2
  111. ),
  112. neck=dict(
  113. in_channels=[128,256,512],
  114. #same as feature from back bone using last four layer
  115. #in_channels=[128,256,512] for resnet34 and [512,1024,2048] for resnet50
  116. feat_strides=[8,16,32],
  117. hidden_dim=256,
  118. n_head=8,
  119. dim_feedforward_ratio=4,
  120. drop_out=0.0,
  121. enc_act=dict(type='GELU'),
  122. use_encoder_idx=[2],
  123. num_encoder_layers=1,
  124. pe_temperature=100*100,
  125. with_spd=False,
  126. norm_cfg= dict(type='BN', requires_grad=True),
  127. widen_factor=0.5,
  128. deepen_factor=1,
  129. input_proj_cfg=dict(
  130. type='ChannelMapper',
  131. in_channels=[128, 256, 512],
  132. #same as input channel in encoder
  133. kernel_size=1,
  134. out_channels=256,
  135. act_cfg=None,
  136. norm_cfg=dict(type='BN', requires_grad=True),
  137. num_outs=3),
  138. eval_spatial_size=[480, 480],
  139. act_cfg= dict(type='SiLU', inplace=True),
  140. type='HybridEncoder'),
  141. num_queries=300,
  142. test_cfg=dict(max_per_img=50),
  143. train_cfg=dict(
  144. assigner=dict(
  145. match_costs=[
  146. dict(type='FocalLossCost', weight=2.0),
  147. dict(box_format='xywh', type='BBoxL1Cost', weight=5.0),
  148. dict(iou_mode='giou', type='IoUCost', weight=2.0),
  149. ],
  150. type='HungarianAssigner')),
  151. type='RtDetr',)
  152. optim_wrapper = dict(
  153. clip_grad=dict(max_norm=0.1, norm_type=2),
  154. optimizer=dict(lr=0.0004, type='AdamW', weight_decay=0.0001),
  155. paramwise_cfg=dict(
  156. custom_keys={'backbone': dict(lr_mult=0.1)},
  157. norm_decay_mult=0,
  158. bypass_duplicate=True),
  159. type='OptimWrapper')
  160. param_scheduler = [
  161. dict(
  162. type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=2000)
  163. ]
  164. resume = False
  165. test_cfg = dict(type='TestLoop')
  166. test_pipeline = [
  167. dict(backend_args=None,color_type='grayscale', type='LoadImageFromFile'),
  168. dict(type='LoadAnnotations', with_bbox=True),
  169. dict(
  170. type='Resize',
  171. scale=image_size,
  172. keep_ratio=False,
  173. # interpolation='bicubic'
  174. ),
  175. dict(
  176. type='PackDetInputs',
  177. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape','scale_factor')),
  178. ]
  179. test_dataloader = dict(
  180. batch_size=1,
  181. dataset=dict(
  182. ann_file='Val/Val.json',
  183. backend_args=None,
  184. data_prefix=dict(img='Val/'),
  185. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  186. metainfo=dict(classes=('barcode', ), palette=[
  187. (
  188. 220,
  189. 20,
  190. 60,
  191. ),
  192. ]),
  193. pipeline=test_pipeline,
  194. test_mode=True,
  195. type='CocoDataset'),
  196. drop_last=False,
  197. num_workers=2,
  198. persistent_workers=True,
  199. sampler=dict(shuffle=False, type='DefaultSampler'))
  200. test_evaluator = dict(
  201. ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val/Val.json',
  202. backend_args=None,
  203. format_only=False,
  204. metric='bbox',
  205. type='CocoMetric')
  206. train_cfg = dict(max_epochs=max_epochs, type='EpochBasedTrainLoop', val_interval=1)
  207. train_pipeline = [
  208. dict(type='LoadImageFromFile', color_type='grayscale',backend_args = None),
  209. dict(type='LoadAnnotations', with_bbox=True),
  210. # dict(
  211. # type='RandomApply',
  212. # transforms=dict(type='PhotoMetricDistortion'),
  213. # prob=0.8),
  214. dict(
  215. type='RandomApply', transforms=dict(type='MinIoURandomCrop'),
  216. prob=0.8),
  217. dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
  218. dict(type='RandomFlip', prob=0.5),
  219. dict(
  220. type='RandomChoice',
  221. transforms=[[
  222. dict(
  223. type='Resize',
  224. scale=image_size,
  225. keep_ratio=False,
  226. interpolation=interpolation)
  227. ] for interpolation in interpolations]),
  228. dict(type='PackDetInputs')
  229. ]
  230. train_dataloader = dict(
  231. batch_sampler=dict(type='AspectRatioBatchSampler'),
  232. batch_size=12,
  233. dataset=dict(
  234. ann_file='Train/Train.json',
  235. backend_args=None,
  236. data_prefix=dict(img='Train/'),
  237. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  238. filter_cfg=dict(filter_empty_gt=False, min_size=32),
  239. metainfo=dict(classes=('barcode', ), palette=[
  240. (
  241. 220,
  242. 20,
  243. 60,
  244. ),
  245. ]),
  246. pipeline=train_pipeline,
  247. type='CocoDataset'),
  248. num_workers=2,
  249. persistent_workers=True,
  250. sampler=dict(shuffle=True, type='DefaultSampler'))
  251. val_cfg = dict(type='ValLoop')
  252. val_dataloader = dict(
  253. batch_size=1,
  254. dataset=dict(
  255. ann_file='Val/Val.json',
  256. backend_args=None,
  257. data_prefix=dict(img='Val/'),
  258. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  259. metainfo=dict(classes=('barcode', ), palette=[
  260. (
  261. 220,
  262. 20,
  263. 60,
  264. ),
  265. ]),
  266. pipeline=[
  267. dict(backend_args=None, color_type="grayscale",type='LoadImageFromFile'),
  268. dict(keep_ratio=False, scale=image_size, type='Resize'),
  269. dict(type='LoadAnnotations', with_bbox=True),
  270. dict(
  271. meta_keys=(
  272. 'img_id',
  273. 'img_path',
  274. 'ori_shape',
  275. 'img_shape',
  276. 'scale_factor',
  277. ),
  278. type='PackDetInputs'),
  279. ],
  280. test_mode=True,
  281. type='CocoDataset'),
  282. drop_last=False,
  283. num_workers=2,
  284. persistent_workers=True,
  285. sampler=dict(shuffle=False, type='DefaultSampler'))
  286. val_evaluator = dict(
  287. ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val/Val.json',
  288. backend_args=None,
  289. format_only=False,
  290. metric='bbox',
  291. type='CocoMetric')
  292. vis_backends = [
  293. dict(type='LocalVisBackend'),
  294. ]
  295. visualizer = dict(
  296. name='visualizer',
  297. type='DetLocalVisualizer',
  298. vis_backends=[
  299. dict(type='LocalVisBackend'),
  300. ])
  301. work_dir = './work_dirs/rtdetr'