config_rtdetrwithyolo.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. auto_scale_lr = dict(base_batch_size=32)
  2. backend_args = None
  3. pretrained = 'https://github.com/flytocc/mmdetection/releases/download/model_zoo/resnet34vd_pretrained_f6a72dc5.pth'
  4. data_root = '../../../media/tricolops/T7/Dataset/coco_format_bd/'
  5. dataset_type = 'CocoDataset'
  6. interpolations = ['nearest', 'bilinear', 'bicubic', 'area', 'lanczos']
  7. default_hooks = dict(
  8. checkpoint=dict(interval=5, save_best='auto', type='CheckpointHook'),
  9. logger=dict(interval=50, type='LoggerHook'),
  10. param_scheduler=dict(type='ParamSchedulerHook'),
  11. sampler_seed=dict(type='DistSamplerSeedHook'),
  12. timer=dict(type='IterTimerHook'),
  13. visualization=dict(type='DetVisualizationHook'))
  14. custom_hooks = [
  15. dict(
  16. type='EMAHook',
  17. ema_type='ExpMomentumEMA',
  18. momentum=0.0001,
  19. update_buffers=True,
  20. priority=49)
  21. ]
  22. default_scope = 'mmdet'
  23. env_cfg = dict(
  24. cudnn_benchmark=False,
  25. dist_cfg=dict(backend='nccl'),
  26. mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
  27. launcher = 'none'
  28. load_from = None
  29. log_level = 'INFO'
  30. log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
  31. max_epochs = 500
  32. metainfo = dict(
  33. classes=('barcode', ), palette=[
  34. (
  35. 220,
  36. 20,
  37. 60,
  38. ),
  39. ])
  40. model = dict(
  41. backbone=dict(
  42. last_stage_out_channels= 768,
  43. plugins = None,
  44. deepen_factor = 0.67,
  45. widen_factor = 0.75,
  46. input_channels = 1,
  47. out_indices = (2, 3, 4),
  48. type='YOLOv8CSPDarknet'),
  49. bbox_head=dict(
  50. num_classes=1,
  51. embed_dims= 256,
  52. #same as hidding dims in decoder and hybridencoder
  53. num_reg_fcs= 2,
  54. sync_cls_avg_factor = False,
  55. loss_cls = dict(
  56. type='VarifocalLoss',
  57. use_sigmoid = True,
  58. alpha= 0.75,
  59. gamma= 2.0,
  60. with_logits=True,
  61. iou_weighted= True,
  62. reduction= 'mean',
  63. loss_weight= 1),
  64. loss_bbox= dict(type='L1Loss', loss_weight=5.0),
  65. loss_iou= dict(type='GIoULoss', loss_weight=2.0),
  66. num_pred_layer = 6,
  67. type='RTDETRHead'),
  68. data_preprocessor=dict(
  69. pad_size_divisor=1,
  70. type='DetDataPreprocessor'),
  71. decoder=dict(
  72. num_classes=1,
  73. hidden_dim=256,
  74. num_queries=300,
  75. position_type='sine',
  76. feat_channels=[256, 256, 256],
  77. #after neck it should be 256
  78. feat_strides=[8,16,32],
  79. #eval size should be same as encoder feat stride
  80. num_levels=3,
  81. num_crossattn_points=4,
  82. number_head=8,
  83. number_decoder_layer=4,
  84. #4 for 34 and 6 for 50
  85. dim_feedforward_ratio=4,
  86. dropout=0.0,
  87. act_cfg= dict(type='ReLU', inplace=True),
  88. num_denoising=100,
  89. label_noise_ratio=0.5,
  90. box_noise_scale=1.0,
  91. learnt_init_query=True,
  92. eval_idx=-1,
  93. eval_spatial_size=[640, 640],
  94. eps=1e-2
  95. ),
  96. neck=dict(
  97. in_channels=[192, 384, 576],
  98. #same as feature from back bone using last four layer
  99. #in_channels=[128,256,512] for resnet34 and [512,1024,2048] for resnet50
  100. feat_strides=[8,16,32],
  101. hidden_dim=256,
  102. n_head=8,
  103. dim_feedforward_ratio=4,
  104. drop_out=0.0,
  105. enc_act=dict(type='GELU'),
  106. use_encoder_idx=[2],
  107. num_encoder_layers=1,
  108. pe_temperature=100*100,
  109. norm_cfg= dict(type='BN', requires_grad=True),
  110. widen_factor=0.5,
  111. deepen_factor=1,
  112. input_proj_cfg=dict(
  113. type='ChannelMapper',
  114. in_channels=[192, 384, 576],
  115. #same as input channel in encoder
  116. kernel_size=1,
  117. out_channels=256,
  118. act_cfg=None,
  119. norm_cfg=dict(type='BN', requires_grad=True),
  120. num_outs=3
  121. ),
  122. eval_spatial_size=[640, 640],
  123. act_cfg= dict(type='SiLU', inplace=True),
  124. type='HybridEncoder'),
  125. num_queries=300,
  126. test_cfg=dict(max_per_img=100),
  127. train_cfg=dict(
  128. assigner=dict(
  129. match_costs=[
  130. dict(type='FocalLossCost', weight=2.0),
  131. dict(box_format='xywh', type='BBoxL1Cost', weight=5.0),
  132. dict(iou_mode='giou', type='IoUCost', weight=2.0),
  133. ],
  134. type='HungarianAssigner')),
  135. type='RtDetr',)
  136. optim_wrapper = dict(
  137. clip_grad=dict(max_norm=10, norm_type=2),
  138. optimizer=dict(
  139. type='SGD',
  140. lr=0.0025,
  141. momentum=0.937,
  142. weight_decay= 0.0005,
  143. nesterov=True,),
  144. type='OptimWrapper')
  145. param_scheduler = [
  146. dict(
  147. type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=max_epochs)
  148. ]
  149. resume = False
  150. test_cfg = dict(type='TestLoop')
  151. test_pipeline = [
  152. dict(backend_args=None,color_type='grayscale', type='LoadImageFromFile'),
  153. dict(type='LoadAnnotations', with_bbox=True),
  154. dict(
  155. type='Resize',
  156. scale=(640, 640),
  157. keep_ratio=False,
  158. interpolation='bicubic'),
  159. dict(
  160. type='PackDetInputs',
  161. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape','scale_factor')),
  162. ]
  163. test_dataloader = dict(
  164. batch_size=1,
  165. dataset=dict(
  166. ann_file='Val/Val.json',
  167. backend_args=None,
  168. data_prefix=dict(img='Val/'),
  169. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  170. metainfo=dict(classes=('barcode', ), palette=[
  171. (
  172. 220,
  173. 20,
  174. 60,
  175. ),
  176. ]),
  177. pipeline=test_pipeline,
  178. test_mode=True,
  179. type='CocoDataset'),
  180. drop_last=False,
  181. num_workers=2,
  182. persistent_workers=True,
  183. sampler=dict(shuffle=False, type='DefaultSampler'))
  184. test_evaluator = dict(
  185. ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val/Val.json',
  186. backend_args=None,
  187. format_only=False,
  188. metric='bbox',
  189. type='CocoMetric')
  190. train_cfg = dict(max_epochs=max_epochs, type='EpochBasedTrainLoop', val_interval=1)
  191. train_pipeline = [
  192. dict(type='LoadImageFromFile', color_type='grayscale',backend_args = None),
  193. dict(type='LoadAnnotations', with_bbox=True),
  194. # dict(
  195. # type='RandomApply',
  196. # transforms=dict(type='PhotoMetricDistortion'),
  197. # prob=0.8),
  198. dict(
  199. type='RandomApply', transforms=dict(type='MinIoURandomCrop'),
  200. prob=0.8),
  201. dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
  202. dict(type='RandomFlip', prob=0.5),
  203. dict(
  204. type='RandomChoice',
  205. transforms=[[
  206. dict(
  207. type='Resize',
  208. scale=(640, 640),
  209. keep_ratio=False,
  210. interpolation=interpolation)
  211. ] for interpolation in interpolations]),
  212. dict(type='PackDetInputs')
  213. ]
  214. train_dataloader = dict(
  215. batch_sampler=dict(type='AspectRatioBatchSampler'),
  216. batch_size=12,
  217. dataset=dict(
  218. ann_file='Train/Train.json',
  219. backend_args=None,
  220. data_prefix=dict(img='Train/'),
  221. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  222. filter_cfg=dict(filter_empty_gt=False, min_size=32),
  223. metainfo=dict(classes=('barcode', ), palette=[
  224. (
  225. 220,
  226. 20,
  227. 60,
  228. ),
  229. ]),
  230. pipeline=train_pipeline,
  231. type='CocoDataset'),
  232. num_workers=2,
  233. persistent_workers=True,
  234. sampler=dict(shuffle=True, type='DefaultSampler'))
  235. val_cfg = dict(type='ValLoop')
  236. val_dataloader = dict(
  237. batch_size=1,
  238. dataset=dict(
  239. ann_file='Val/Val.json',
  240. backend_args=None,
  241. data_prefix=dict(img='Val/'),
  242. data_root='../../../media/tricolops/T7/Dataset/coco_format_bd/',
  243. metainfo=dict(classes=('barcode', ), palette=[
  244. (
  245. 220,
  246. 20,
  247. 60,
  248. ),
  249. ]),
  250. pipeline=[
  251. dict(backend_args=None, color_type="grayscale",type='LoadImageFromFile'),
  252. dict(keep_ratio=False, scale=(
  253. 640,
  254. 640,
  255. ), type='Resize'),
  256. dict(type='LoadAnnotations', with_bbox=True),
  257. dict(
  258. meta_keys=(
  259. 'img_id',
  260. 'img_path',
  261. 'ori_shape',
  262. 'img_shape',
  263. 'scale_factor',
  264. ),
  265. type='PackDetInputs'),
  266. ],
  267. test_mode=True,
  268. type='CocoDataset'),
  269. drop_last=False,
  270. num_workers=2,
  271. persistent_workers=True,
  272. sampler=dict(shuffle=False, type='DefaultSampler'))
  273. val_evaluator = dict(
  274. ann_file='../../../media/tricolops/T7/Dataset/coco_format_bd/Val/Val.json',
  275. backend_args=None,
  276. format_only=False,
  277. metric='bbox',
  278. type='CocoMetric')
  279. vis_backends = [
  280. dict(type='LocalVisBackend'),
  281. ]
  282. visualizer = dict(
  283. name='visualizer',
  284. type='DetLocalVisualizer',
  285. vis_backends=[
  286. dict(type='LocalVisBackend'),
  287. ])
  288. work_dir = './work_dirs/rtdetr_yolo'